xref: /llvm-project/llvm/test/CodeGen/X86/vec_minmax_uint.ll (revision be6c752e157638849f1f59f7e2b7ecbe11a022fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
9
10;
11; Unsigned Maximum (GT)
12;
13
14define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15; SSE2-LABEL: max_gt_v2i64:
16; SSE2:       # %bb.0:
17; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
18; SSE2-NEXT:    movdqa %xmm1, %xmm3
19; SSE2-NEXT:    pxor %xmm2, %xmm3
20; SSE2-NEXT:    pxor %xmm0, %xmm2
21; SSE2-NEXT:    movdqa %xmm2, %xmm4
22; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
23; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
25; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26; SSE2-NEXT:    pand %xmm5, %xmm2
27; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28; SSE2-NEXT:    por %xmm2, %xmm3
29; SSE2-NEXT:    pand %xmm3, %xmm0
30; SSE2-NEXT:    pandn %xmm1, %xmm3
31; SSE2-NEXT:    por %xmm3, %xmm0
32; SSE2-NEXT:    retq
33;
34; SSE41-LABEL: max_gt_v2i64:
35; SSE41:       # %bb.0:
36; SSE41-NEXT:    movdqa %xmm0, %xmm2
37; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
38; SSE41-NEXT:    movdqa %xmm1, %xmm0
39; SSE41-NEXT:    pxor %xmm3, %xmm0
40; SSE41-NEXT:    pxor %xmm2, %xmm3
41; SSE41-NEXT:    movdqa %xmm3, %xmm4
42; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
43; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
44; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
45; SSE41-NEXT:    pand %xmm4, %xmm0
46; SSE41-NEXT:    por %xmm3, %xmm0
47; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
48; SSE41-NEXT:    movapd %xmm1, %xmm0
49; SSE41-NEXT:    retq
50;
51; SSE42-LABEL: max_gt_v2i64:
52; SSE42:       # %bb.0:
53; SSE42-NEXT:    movdqa %xmm0, %xmm2
54; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
55; SSE42-NEXT:    movdqa %xmm1, %xmm3
56; SSE42-NEXT:    pxor %xmm0, %xmm3
57; SSE42-NEXT:    pxor %xmm2, %xmm0
58; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
59; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
60; SSE42-NEXT:    movapd %xmm1, %xmm0
61; SSE42-NEXT:    retq
62;
63; AVX1-LABEL: max_gt_v2i64:
64; AVX1:       # %bb.0:
65; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
66; AVX1-NEXT:    # xmm2 = mem[0,0]
67; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
68; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
69; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
70; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: max_gt_v2i64:
74; AVX2:       # %bb.0:
75; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
76; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
77; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
78; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
79; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
80; AVX2-NEXT:    retq
81;
82; AVX512-LABEL: max_gt_v2i64:
83; AVX512:       # %bb.0:
84; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
85; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
86; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
87; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
88; AVX512-NEXT:    vzeroupper
89; AVX512-NEXT:    retq
90  %1 = icmp ugt <2 x i64> %a, %b
91  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
92  ret <2 x i64> %2
93}
94
95define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
96; SSE2-LABEL: max_gt_v4i64:
97; SSE2:       # %bb.0:
98; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
99; SSE2-NEXT:    movdqa %xmm2, %xmm5
100; SSE2-NEXT:    pxor %xmm4, %xmm5
101; SSE2-NEXT:    movdqa %xmm0, %xmm6
102; SSE2-NEXT:    pxor %xmm4, %xmm6
103; SSE2-NEXT:    movdqa %xmm6, %xmm7
104; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
105; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
106; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
107; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
108; SSE2-NEXT:    pand %xmm8, %xmm5
109; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
110; SSE2-NEXT:    por %xmm5, %xmm6
111; SSE2-NEXT:    pand %xmm6, %xmm0
112; SSE2-NEXT:    pandn %xmm2, %xmm6
113; SSE2-NEXT:    por %xmm6, %xmm0
114; SSE2-NEXT:    movdqa %xmm3, %xmm2
115; SSE2-NEXT:    pxor %xmm4, %xmm2
116; SSE2-NEXT:    pxor %xmm1, %xmm4
117; SSE2-NEXT:    movdqa %xmm4, %xmm5
118; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
119; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
120; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
121; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
122; SSE2-NEXT:    pand %xmm6, %xmm2
123; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
124; SSE2-NEXT:    por %xmm2, %xmm4
125; SSE2-NEXT:    pand %xmm4, %xmm1
126; SSE2-NEXT:    pandn %xmm3, %xmm4
127; SSE2-NEXT:    por %xmm4, %xmm1
128; SSE2-NEXT:    retq
129;
130; SSE41-LABEL: max_gt_v4i64:
131; SSE41:       # %bb.0:
132; SSE41-NEXT:    movdqa %xmm0, %xmm4
133; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
134; SSE41-NEXT:    movdqa %xmm2, %xmm0
135; SSE41-NEXT:    pxor %xmm5, %xmm0
136; SSE41-NEXT:    movdqa %xmm4, %xmm6
137; SSE41-NEXT:    pxor %xmm5, %xmm6
138; SSE41-NEXT:    movdqa %xmm6, %xmm7
139; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
140; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
141; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
142; SSE41-NEXT:    pand %xmm7, %xmm0
143; SSE41-NEXT:    por %xmm6, %xmm0
144; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
145; SSE41-NEXT:    movdqa %xmm3, %xmm0
146; SSE41-NEXT:    pxor %xmm5, %xmm0
147; SSE41-NEXT:    pxor %xmm1, %xmm5
148; SSE41-NEXT:    movdqa %xmm5, %xmm4
149; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
150; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
151; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
152; SSE41-NEXT:    pand %xmm4, %xmm0
153; SSE41-NEXT:    por %xmm5, %xmm0
154; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
155; SSE41-NEXT:    movapd %xmm2, %xmm0
156; SSE41-NEXT:    movapd %xmm3, %xmm1
157; SSE41-NEXT:    retq
158;
159; SSE42-LABEL: max_gt_v4i64:
160; SSE42:       # %bb.0:
161; SSE42-NEXT:    movdqa %xmm0, %xmm4
162; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
163; SSE42-NEXT:    movdqa %xmm2, %xmm6
164; SSE42-NEXT:    pxor %xmm5, %xmm6
165; SSE42-NEXT:    pxor %xmm5, %xmm0
166; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
167; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
168; SSE42-NEXT:    movdqa %xmm3, %xmm0
169; SSE42-NEXT:    pxor %xmm5, %xmm0
170; SSE42-NEXT:    pxor %xmm1, %xmm5
171; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
172; SSE42-NEXT:    movdqa %xmm5, %xmm0
173; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
174; SSE42-NEXT:    movapd %xmm2, %xmm0
175; SSE42-NEXT:    movapd %xmm3, %xmm1
176; SSE42-NEXT:    retq
177;
178; AVX1-LABEL: max_gt_v4i64:
179; AVX1:       # %bb.0:
180; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
181; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
182; AVX1-NEXT:    # xmm3 = mem[0,0]
183; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
184; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
185; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
186; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
187; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
188; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
189; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
190; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
191; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
192; AVX1-NEXT:    retq
193;
194; AVX2-LABEL: max_gt_v4i64:
195; AVX2:       # %bb.0:
196; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
197; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
198; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
199; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
200; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
201; AVX2-NEXT:    retq
202;
203; AVX512-LABEL: max_gt_v4i64:
204; AVX512:       # %bb.0:
205; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
206; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
207; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
208; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
209; AVX512-NEXT:    retq
210  %1 = icmp ugt <4 x i64> %a, %b
211  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
212  ret <4 x i64> %2
213}
214
215define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
216; SSE2-LABEL: max_gt_v4i32:
217; SSE2:       # %bb.0:
218; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
219; SSE2-NEXT:    movdqa %xmm1, %xmm3
220; SSE2-NEXT:    pxor %xmm2, %xmm3
221; SSE2-NEXT:    pxor %xmm0, %xmm2
222; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
223; SSE2-NEXT:    pand %xmm2, %xmm0
224; SSE2-NEXT:    pandn %xmm1, %xmm2
225; SSE2-NEXT:    por %xmm2, %xmm0
226; SSE2-NEXT:    retq
227;
228; SSE41-LABEL: max_gt_v4i32:
229; SSE41:       # %bb.0:
230; SSE41-NEXT:    pmaxud %xmm1, %xmm0
231; SSE41-NEXT:    retq
232;
233; SSE42-LABEL: max_gt_v4i32:
234; SSE42:       # %bb.0:
235; SSE42-NEXT:    pmaxud %xmm1, %xmm0
236; SSE42-NEXT:    retq
237;
238; AVX-LABEL: max_gt_v4i32:
239; AVX:       # %bb.0:
240; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
241; AVX-NEXT:    retq
242  %1 = icmp ugt <4 x i32> %a, %b
243  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
244  ret <4 x i32> %2
245}
246
247define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
248; SSE2-LABEL: max_gt_v8i32:
249; SSE2:       # %bb.0:
250; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
251; SSE2-NEXT:    movdqa %xmm2, %xmm5
252; SSE2-NEXT:    pxor %xmm4, %xmm5
253; SSE2-NEXT:    movdqa %xmm0, %xmm6
254; SSE2-NEXT:    pxor %xmm4, %xmm6
255; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
256; SSE2-NEXT:    pand %xmm6, %xmm0
257; SSE2-NEXT:    pandn %xmm2, %xmm6
258; SSE2-NEXT:    por %xmm6, %xmm0
259; SSE2-NEXT:    movdqa %xmm3, %xmm2
260; SSE2-NEXT:    pxor %xmm4, %xmm2
261; SSE2-NEXT:    pxor %xmm1, %xmm4
262; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
263; SSE2-NEXT:    pand %xmm4, %xmm1
264; SSE2-NEXT:    pandn %xmm3, %xmm4
265; SSE2-NEXT:    por %xmm4, %xmm1
266; SSE2-NEXT:    retq
267;
268; SSE41-LABEL: max_gt_v8i32:
269; SSE41:       # %bb.0:
270; SSE41-NEXT:    pmaxud %xmm2, %xmm0
271; SSE41-NEXT:    pmaxud %xmm3, %xmm1
272; SSE41-NEXT:    retq
273;
274; SSE42-LABEL: max_gt_v8i32:
275; SSE42:       # %bb.0:
276; SSE42-NEXT:    pmaxud %xmm2, %xmm0
277; SSE42-NEXT:    pmaxud %xmm3, %xmm1
278; SSE42-NEXT:    retq
279;
280; AVX1-LABEL: max_gt_v8i32:
281; AVX1:       # %bb.0:
282; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
283; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
284; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
285; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
286; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
287; AVX1-NEXT:    retq
288;
289; AVX2-LABEL: max_gt_v8i32:
290; AVX2:       # %bb.0:
291; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
292; AVX2-NEXT:    retq
293;
294; AVX512-LABEL: max_gt_v8i32:
295; AVX512:       # %bb.0:
296; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
297; AVX512-NEXT:    retq
298  %1 = icmp ugt <8 x i32> %a, %b
299  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
300  ret <8 x i32> %2
301}
302
303define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
304; SSE2-LABEL: max_gt_v8i16:
305; SSE2:       # %bb.0:
306; SSE2-NEXT:    psubusw %xmm0, %xmm1
307; SSE2-NEXT:    paddw %xmm1, %xmm0
308; SSE2-NEXT:    retq
309;
310; SSE41-LABEL: max_gt_v8i16:
311; SSE41:       # %bb.0:
312; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
313; SSE41-NEXT:    retq
314;
315; SSE42-LABEL: max_gt_v8i16:
316; SSE42:       # %bb.0:
317; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
318; SSE42-NEXT:    retq
319;
320; AVX-LABEL: max_gt_v8i16:
321; AVX:       # %bb.0:
322; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
323; AVX-NEXT:    retq
324  %1 = icmp ugt <8 x i16> %a, %b
325  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
326  ret <8 x i16> %2
327}
328
329define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
330; SSE2-LABEL: max_gt_v16i16:
331; SSE2:       # %bb.0:
332; SSE2-NEXT:    psubusw %xmm0, %xmm2
333; SSE2-NEXT:    paddw %xmm2, %xmm0
334; SSE2-NEXT:    psubusw %xmm1, %xmm3
335; SSE2-NEXT:    paddw %xmm3, %xmm1
336; SSE2-NEXT:    retq
337;
338; SSE41-LABEL: max_gt_v16i16:
339; SSE41:       # %bb.0:
340; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
341; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
342; SSE41-NEXT:    retq
343;
344; SSE42-LABEL: max_gt_v16i16:
345; SSE42:       # %bb.0:
346; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
347; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
348; SSE42-NEXT:    retq
349;
350; AVX1-LABEL: max_gt_v16i16:
351; AVX1:       # %bb.0:
352; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
353; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
354; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
355; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
356; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
357; AVX1-NEXT:    retq
358;
359; AVX2-LABEL: max_gt_v16i16:
360; AVX2:       # %bb.0:
361; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
362; AVX2-NEXT:    retq
363;
364; AVX512-LABEL: max_gt_v16i16:
365; AVX512:       # %bb.0:
366; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
367; AVX512-NEXT:    retq
368  %1 = icmp ugt <16 x i16> %a, %b
369  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
370  ret <16 x i16> %2
371}
372
373define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
374; SSE-LABEL: max_gt_v16i8:
375; SSE:       # %bb.0:
376; SSE-NEXT:    pmaxub %xmm1, %xmm0
377; SSE-NEXT:    retq
378;
379; AVX-LABEL: max_gt_v16i8:
380; AVX:       # %bb.0:
381; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
382; AVX-NEXT:    retq
383  %1 = icmp ugt <16 x i8> %a, %b
384  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
385  ret <16 x i8> %2
386}
387
388define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
389; SSE-LABEL: max_gt_v32i8:
390; SSE:       # %bb.0:
391; SSE-NEXT:    pmaxub %xmm2, %xmm0
392; SSE-NEXT:    pmaxub %xmm3, %xmm1
393; SSE-NEXT:    retq
394;
395; AVX1-LABEL: max_gt_v32i8:
396; AVX1:       # %bb.0:
397; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
398; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
399; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
400; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
401; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
402; AVX1-NEXT:    retq
403;
404; AVX2-LABEL: max_gt_v32i8:
405; AVX2:       # %bb.0:
406; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
407; AVX2-NEXT:    retq
408;
409; AVX512-LABEL: max_gt_v32i8:
410; AVX512:       # %bb.0:
411; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
412; AVX512-NEXT:    retq
413  %1 = icmp ugt <32 x i8> %a, %b
414  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
415  ret <32 x i8> %2
416}
417
418;
419; Unsigned Maximum (GE)
420;
421
422define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
423; SSE2-LABEL: max_ge_v2i64:
424; SSE2:       # %bb.0:
425; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
426; SSE2-NEXT:    movdqa %xmm1, %xmm3
427; SSE2-NEXT:    pxor %xmm2, %xmm3
428; SSE2-NEXT:    pxor %xmm0, %xmm2
429; SSE2-NEXT:    movdqa %xmm2, %xmm4
430; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
431; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
432; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
433; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
434; SSE2-NEXT:    pand %xmm5, %xmm2
435; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
436; SSE2-NEXT:    por %xmm2, %xmm3
437; SSE2-NEXT:    pand %xmm3, %xmm0
438; SSE2-NEXT:    pandn %xmm1, %xmm3
439; SSE2-NEXT:    por %xmm3, %xmm0
440; SSE2-NEXT:    retq
441;
442; SSE41-LABEL: max_ge_v2i64:
443; SSE41:       # %bb.0:
444; SSE41-NEXT:    movdqa %xmm0, %xmm2
445; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
446; SSE41-NEXT:    movdqa %xmm1, %xmm0
447; SSE41-NEXT:    pxor %xmm3, %xmm0
448; SSE41-NEXT:    pxor %xmm2, %xmm3
449; SSE41-NEXT:    movdqa %xmm3, %xmm4
450; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
451; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
452; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
453; SSE41-NEXT:    pand %xmm4, %xmm0
454; SSE41-NEXT:    por %xmm3, %xmm0
455; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
456; SSE41-NEXT:    movapd %xmm1, %xmm0
457; SSE41-NEXT:    retq
458;
459; SSE42-LABEL: max_ge_v2i64:
460; SSE42:       # %bb.0:
461; SSE42-NEXT:    movdqa %xmm0, %xmm2
462; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
463; SSE42-NEXT:    movdqa %xmm1, %xmm3
464; SSE42-NEXT:    pxor %xmm0, %xmm3
465; SSE42-NEXT:    pxor %xmm2, %xmm0
466; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
467; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
468; SSE42-NEXT:    movapd %xmm1, %xmm0
469; SSE42-NEXT:    retq
470;
471; AVX1-LABEL: max_ge_v2i64:
472; AVX1:       # %bb.0:
473; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
474; AVX1-NEXT:    # xmm2 = mem[0,0]
475; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
476; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
477; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
478; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
479; AVX1-NEXT:    retq
480;
481; AVX2-LABEL: max_ge_v2i64:
482; AVX2:       # %bb.0:
483; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
484; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
485; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
486; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
487; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
488; AVX2-NEXT:    retq
489;
490; AVX512-LABEL: max_ge_v2i64:
491; AVX512:       # %bb.0:
492; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
493; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
494; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
495; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
496; AVX512-NEXT:    vzeroupper
497; AVX512-NEXT:    retq
498  %1 = icmp uge <2 x i64> %a, %b
499  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
500  ret <2 x i64> %2
501}
502
503define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
504; SSE2-LABEL: max_ge_v4i64:
505; SSE2:       # %bb.0:
506; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
507; SSE2-NEXT:    movdqa %xmm2, %xmm5
508; SSE2-NEXT:    pxor %xmm4, %xmm5
509; SSE2-NEXT:    movdqa %xmm0, %xmm6
510; SSE2-NEXT:    pxor %xmm4, %xmm6
511; SSE2-NEXT:    movdqa %xmm6, %xmm7
512; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
513; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
514; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
515; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
516; SSE2-NEXT:    pand %xmm8, %xmm5
517; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
518; SSE2-NEXT:    por %xmm5, %xmm6
519; SSE2-NEXT:    pand %xmm6, %xmm0
520; SSE2-NEXT:    pandn %xmm2, %xmm6
521; SSE2-NEXT:    por %xmm6, %xmm0
522; SSE2-NEXT:    movdqa %xmm3, %xmm2
523; SSE2-NEXT:    pxor %xmm4, %xmm2
524; SSE2-NEXT:    pxor %xmm1, %xmm4
525; SSE2-NEXT:    movdqa %xmm4, %xmm5
526; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
527; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
528; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
529; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
530; SSE2-NEXT:    pand %xmm6, %xmm2
531; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
532; SSE2-NEXT:    por %xmm2, %xmm4
533; SSE2-NEXT:    pand %xmm4, %xmm1
534; SSE2-NEXT:    pandn %xmm3, %xmm4
535; SSE2-NEXT:    por %xmm4, %xmm1
536; SSE2-NEXT:    retq
537;
538; SSE41-LABEL: max_ge_v4i64:
539; SSE41:       # %bb.0:
540; SSE41-NEXT:    movdqa %xmm0, %xmm4
541; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
542; SSE41-NEXT:    movdqa %xmm2, %xmm0
543; SSE41-NEXT:    pxor %xmm5, %xmm0
544; SSE41-NEXT:    movdqa %xmm4, %xmm6
545; SSE41-NEXT:    pxor %xmm5, %xmm6
546; SSE41-NEXT:    movdqa %xmm6, %xmm7
547; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
548; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
549; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
550; SSE41-NEXT:    pand %xmm7, %xmm0
551; SSE41-NEXT:    por %xmm6, %xmm0
552; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
553; SSE41-NEXT:    movdqa %xmm3, %xmm0
554; SSE41-NEXT:    pxor %xmm5, %xmm0
555; SSE41-NEXT:    pxor %xmm1, %xmm5
556; SSE41-NEXT:    movdqa %xmm5, %xmm4
557; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
558; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
559; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
560; SSE41-NEXT:    pand %xmm4, %xmm0
561; SSE41-NEXT:    por %xmm5, %xmm0
562; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
563; SSE41-NEXT:    movapd %xmm2, %xmm0
564; SSE41-NEXT:    movapd %xmm3, %xmm1
565; SSE41-NEXT:    retq
566;
567; SSE42-LABEL: max_ge_v4i64:
568; SSE42:       # %bb.0:
569; SSE42-NEXT:    movdqa %xmm0, %xmm4
570; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
571; SSE42-NEXT:    movdqa %xmm2, %xmm6
572; SSE42-NEXT:    pxor %xmm5, %xmm6
573; SSE42-NEXT:    pxor %xmm5, %xmm0
574; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
575; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
576; SSE42-NEXT:    movdqa %xmm3, %xmm0
577; SSE42-NEXT:    pxor %xmm5, %xmm0
578; SSE42-NEXT:    pxor %xmm1, %xmm5
579; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
580; SSE42-NEXT:    movdqa %xmm5, %xmm0
581; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
582; SSE42-NEXT:    movapd %xmm2, %xmm0
583; SSE42-NEXT:    movapd %xmm3, %xmm1
584; SSE42-NEXT:    retq
585;
586; AVX1-LABEL: max_ge_v4i64:
587; AVX1:       # %bb.0:
588; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
589; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
590; AVX1-NEXT:    # xmm3 = mem[0,0]
591; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
592; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
593; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
594; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
595; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
596; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
597; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
598; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
599; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
600; AVX1-NEXT:    retq
601;
602; AVX2-LABEL: max_ge_v4i64:
603; AVX2:       # %bb.0:
604; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
605; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
606; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
607; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
608; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
609; AVX2-NEXT:    retq
610;
611; AVX512-LABEL: max_ge_v4i64:
612; AVX512:       # %bb.0:
613; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
614; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
615; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
616; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
617; AVX512-NEXT:    retq
618  %1 = icmp uge <4 x i64> %a, %b
619  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
620  ret <4 x i64> %2
621}
622
623define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
624; SSE2-LABEL: max_ge_v4i32:
625; SSE2:       # %bb.0:
626; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
627; SSE2-NEXT:    movdqa %xmm1, %xmm3
628; SSE2-NEXT:    pxor %xmm2, %xmm3
629; SSE2-NEXT:    pxor %xmm0, %xmm2
630; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
631; SSE2-NEXT:    pand %xmm2, %xmm0
632; SSE2-NEXT:    pandn %xmm1, %xmm2
633; SSE2-NEXT:    por %xmm2, %xmm0
634; SSE2-NEXT:    retq
635;
636; SSE41-LABEL: max_ge_v4i32:
637; SSE41:       # %bb.0:
638; SSE41-NEXT:    pmaxud %xmm1, %xmm0
639; SSE41-NEXT:    retq
640;
641; SSE42-LABEL: max_ge_v4i32:
642; SSE42:       # %bb.0:
643; SSE42-NEXT:    pmaxud %xmm1, %xmm0
644; SSE42-NEXT:    retq
645;
646; AVX-LABEL: max_ge_v4i32:
647; AVX:       # %bb.0:
648; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
649; AVX-NEXT:    retq
650  %1 = icmp uge <4 x i32> %a, %b
651  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
652  ret <4 x i32> %2
653}
654
655define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
656; SSE2-LABEL: max_ge_v8i32:
657; SSE2:       # %bb.0:
658; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
659; SSE2-NEXT:    movdqa %xmm2, %xmm5
660; SSE2-NEXT:    pxor %xmm4, %xmm5
661; SSE2-NEXT:    movdqa %xmm0, %xmm6
662; SSE2-NEXT:    pxor %xmm4, %xmm6
663; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
664; SSE2-NEXT:    pand %xmm6, %xmm0
665; SSE2-NEXT:    pandn %xmm2, %xmm6
666; SSE2-NEXT:    por %xmm6, %xmm0
667; SSE2-NEXT:    movdqa %xmm3, %xmm2
668; SSE2-NEXT:    pxor %xmm4, %xmm2
669; SSE2-NEXT:    pxor %xmm1, %xmm4
670; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
671; SSE2-NEXT:    pand %xmm4, %xmm1
672; SSE2-NEXT:    pandn %xmm3, %xmm4
673; SSE2-NEXT:    por %xmm4, %xmm1
674; SSE2-NEXT:    retq
675;
676; SSE41-LABEL: max_ge_v8i32:
677; SSE41:       # %bb.0:
678; SSE41-NEXT:    pmaxud %xmm2, %xmm0
679; SSE41-NEXT:    pmaxud %xmm3, %xmm1
680; SSE41-NEXT:    retq
681;
682; SSE42-LABEL: max_ge_v8i32:
683; SSE42:       # %bb.0:
684; SSE42-NEXT:    pmaxud %xmm2, %xmm0
685; SSE42-NEXT:    pmaxud %xmm3, %xmm1
686; SSE42-NEXT:    retq
687;
688; AVX1-LABEL: max_ge_v8i32:
689; AVX1:       # %bb.0:
690; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
691; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
692; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
693; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
694; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
695; AVX1-NEXT:    retq
696;
697; AVX2-LABEL: max_ge_v8i32:
698; AVX2:       # %bb.0:
699; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
700; AVX2-NEXT:    retq
701;
702; AVX512-LABEL: max_ge_v8i32:
703; AVX512:       # %bb.0:
704; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
705; AVX512-NEXT:    retq
706  %1 = icmp uge <8 x i32> %a, %b
707  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
708  ret <8 x i32> %2
709}
710
711define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
712; SSE2-LABEL: max_ge_v8i16:
713; SSE2:       # %bb.0:
714; SSE2-NEXT:    psubusw %xmm0, %xmm1
715; SSE2-NEXT:    paddw %xmm1, %xmm0
716; SSE2-NEXT:    retq
717;
718; SSE41-LABEL: max_ge_v8i16:
719; SSE41:       # %bb.0:
720; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
721; SSE41-NEXT:    retq
722;
723; SSE42-LABEL: max_ge_v8i16:
724; SSE42:       # %bb.0:
725; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
726; SSE42-NEXT:    retq
727;
728; AVX-LABEL: max_ge_v8i16:
729; AVX:       # %bb.0:
730; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
731; AVX-NEXT:    retq
732  %1 = icmp uge <8 x i16> %a, %b
733  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
734  ret <8 x i16> %2
735}
736
737define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
738; SSE2-LABEL: max_ge_v16i16:
739; SSE2:       # %bb.0:
740; SSE2-NEXT:    psubusw %xmm0, %xmm2
741; SSE2-NEXT:    paddw %xmm2, %xmm0
742; SSE2-NEXT:    psubusw %xmm1, %xmm3
743; SSE2-NEXT:    paddw %xmm3, %xmm1
744; SSE2-NEXT:    retq
745;
746; SSE41-LABEL: max_ge_v16i16:
747; SSE41:       # %bb.0:
748; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
749; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
750; SSE41-NEXT:    retq
751;
752; SSE42-LABEL: max_ge_v16i16:
753; SSE42:       # %bb.0:
754; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
755; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
756; SSE42-NEXT:    retq
757;
758; AVX1-LABEL: max_ge_v16i16:
759; AVX1:       # %bb.0:
760; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
761; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
762; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
763; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
764; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
765; AVX1-NEXT:    retq
766;
767; AVX2-LABEL: max_ge_v16i16:
768; AVX2:       # %bb.0:
769; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
770; AVX2-NEXT:    retq
771;
772; AVX512-LABEL: max_ge_v16i16:
773; AVX512:       # %bb.0:
774; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
775; AVX512-NEXT:    retq
776  %1 = icmp uge <16 x i16> %a, %b
777  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
778  ret <16 x i16> %2
779}
780
781define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
782; SSE-LABEL: max_ge_v16i8:
783; SSE:       # %bb.0:
784; SSE-NEXT:    pmaxub %xmm1, %xmm0
785; SSE-NEXT:    retq
786;
787; AVX-LABEL: max_ge_v16i8:
788; AVX:       # %bb.0:
789; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
790; AVX-NEXT:    retq
791  %1 = icmp uge <16 x i8> %a, %b
792  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
793  ret <16 x i8> %2
794}
795
796define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
797; SSE-LABEL: max_ge_v32i8:
798; SSE:       # %bb.0:
799; SSE-NEXT:    pmaxub %xmm2, %xmm0
800; SSE-NEXT:    pmaxub %xmm3, %xmm1
801; SSE-NEXT:    retq
802;
803; AVX1-LABEL: max_ge_v32i8:
804; AVX1:       # %bb.0:
805; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
806; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
807; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
808; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
809; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
810; AVX1-NEXT:    retq
811;
812; AVX2-LABEL: max_ge_v32i8:
813; AVX2:       # %bb.0:
814; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
815; AVX2-NEXT:    retq
816;
817; AVX512-LABEL: max_ge_v32i8:
818; AVX512:       # %bb.0:
819; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
820; AVX512-NEXT:    retq
821  %1 = icmp uge <32 x i8> %a, %b
822  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
823  ret <32 x i8> %2
824}
825
826;
827; Unsigned Minimum (LT)
828;
829
830define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
831; SSE2-LABEL: min_lt_v2i64:
832; SSE2:       # %bb.0:
833; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
834; SSE2-NEXT:    movdqa %xmm0, %xmm3
835; SSE2-NEXT:    pxor %xmm2, %xmm3
836; SSE2-NEXT:    pxor %xmm1, %xmm2
837; SSE2-NEXT:    movdqa %xmm2, %xmm4
838; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
839; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
840; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
841; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
842; SSE2-NEXT:    pand %xmm5, %xmm2
843; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
844; SSE2-NEXT:    por %xmm2, %xmm3
845; SSE2-NEXT:    pand %xmm3, %xmm0
846; SSE2-NEXT:    pandn %xmm1, %xmm3
847; SSE2-NEXT:    por %xmm3, %xmm0
848; SSE2-NEXT:    retq
849;
850; SSE41-LABEL: min_lt_v2i64:
851; SSE41:       # %bb.0:
852; SSE41-NEXT:    movdqa %xmm0, %xmm2
853; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
854; SSE41-NEXT:    pxor %xmm3, %xmm0
855; SSE41-NEXT:    pxor %xmm1, %xmm3
856; SSE41-NEXT:    movdqa %xmm3, %xmm4
857; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
858; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
859; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
860; SSE41-NEXT:    pand %xmm4, %xmm0
861; SSE41-NEXT:    por %xmm3, %xmm0
862; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
863; SSE41-NEXT:    movapd %xmm1, %xmm0
864; SSE41-NEXT:    retq
865;
866; SSE42-LABEL: min_lt_v2i64:
867; SSE42:       # %bb.0:
868; SSE42-NEXT:    movdqa %xmm0, %xmm2
869; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
870; SSE42-NEXT:    movdqa %xmm2, %xmm3
871; SSE42-NEXT:    pxor %xmm0, %xmm3
872; SSE42-NEXT:    pxor %xmm1, %xmm0
873; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
874; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
875; SSE42-NEXT:    movapd %xmm1, %xmm0
876; SSE42-NEXT:    retq
877;
878; AVX1-LABEL: min_lt_v2i64:
879; AVX1:       # %bb.0:
880; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
881; AVX1-NEXT:    # xmm2 = mem[0,0]
882; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
883; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
884; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
885; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
886; AVX1-NEXT:    retq
887;
888; AVX2-LABEL: min_lt_v2i64:
889; AVX2:       # %bb.0:
890; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
891; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
892; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
893; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
894; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
895; AVX2-NEXT:    retq
896;
897; AVX512-LABEL: min_lt_v2i64:
898; AVX512:       # %bb.0:
899; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
900; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
901; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
902; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
903; AVX512-NEXT:    vzeroupper
904; AVX512-NEXT:    retq
905  %1 = icmp ult <2 x i64> %a, %b
906  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
907  ret <2 x i64> %2
908}
909
910define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
911; SSE2-LABEL: min_lt_v4i64:
912; SSE2:       # %bb.0:
913; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
914; SSE2-NEXT:    movdqa %xmm0, %xmm5
915; SSE2-NEXT:    pxor %xmm4, %xmm5
916; SSE2-NEXT:    movdqa %xmm2, %xmm6
917; SSE2-NEXT:    pxor %xmm4, %xmm6
918; SSE2-NEXT:    movdqa %xmm6, %xmm7
919; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
920; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
921; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
922; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
923; SSE2-NEXT:    pand %xmm8, %xmm5
924; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
925; SSE2-NEXT:    por %xmm5, %xmm6
926; SSE2-NEXT:    pand %xmm6, %xmm0
927; SSE2-NEXT:    pandn %xmm2, %xmm6
928; SSE2-NEXT:    por %xmm6, %xmm0
929; SSE2-NEXT:    movdqa %xmm1, %xmm2
930; SSE2-NEXT:    pxor %xmm4, %xmm2
931; SSE2-NEXT:    pxor %xmm3, %xmm4
932; SSE2-NEXT:    movdqa %xmm4, %xmm5
933; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
934; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
935; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
936; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
937; SSE2-NEXT:    pand %xmm6, %xmm2
938; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
939; SSE2-NEXT:    por %xmm2, %xmm4
940; SSE2-NEXT:    pand %xmm4, %xmm1
941; SSE2-NEXT:    pandn %xmm3, %xmm4
942; SSE2-NEXT:    por %xmm4, %xmm1
943; SSE2-NEXT:    retq
944;
945; SSE41-LABEL: min_lt_v4i64:
946; SSE41:       # %bb.0:
947; SSE41-NEXT:    movdqa %xmm0, %xmm4
948; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
949; SSE41-NEXT:    pxor %xmm5, %xmm0
950; SSE41-NEXT:    movdqa %xmm2, %xmm6
951; SSE41-NEXT:    pxor %xmm5, %xmm6
952; SSE41-NEXT:    movdqa %xmm6, %xmm7
953; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
954; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
955; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
956; SSE41-NEXT:    pand %xmm7, %xmm0
957; SSE41-NEXT:    por %xmm6, %xmm0
958; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
959; SSE41-NEXT:    movdqa %xmm1, %xmm0
960; SSE41-NEXT:    pxor %xmm5, %xmm0
961; SSE41-NEXT:    pxor %xmm3, %xmm5
962; SSE41-NEXT:    movdqa %xmm5, %xmm4
963; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
964; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
965; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
966; SSE41-NEXT:    pand %xmm4, %xmm0
967; SSE41-NEXT:    por %xmm5, %xmm0
968; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
969; SSE41-NEXT:    movapd %xmm2, %xmm0
970; SSE41-NEXT:    movapd %xmm3, %xmm1
971; SSE41-NEXT:    retq
972;
973; SSE42-LABEL: min_lt_v4i64:
974; SSE42:       # %bb.0:
975; SSE42-NEXT:    movdqa %xmm0, %xmm4
976; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
977; SSE42-NEXT:    movdqa %xmm0, %xmm6
978; SSE42-NEXT:    pxor %xmm5, %xmm6
979; SSE42-NEXT:    movdqa %xmm2, %xmm0
980; SSE42-NEXT:    pxor %xmm5, %xmm0
981; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
982; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
983; SSE42-NEXT:    movdqa %xmm1, %xmm0
984; SSE42-NEXT:    pxor %xmm5, %xmm0
985; SSE42-NEXT:    pxor %xmm3, %xmm5
986; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
987; SSE42-NEXT:    movdqa %xmm5, %xmm0
988; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
989; SSE42-NEXT:    movapd %xmm2, %xmm0
990; SSE42-NEXT:    movapd %xmm3, %xmm1
991; SSE42-NEXT:    retq
992;
993; AVX1-LABEL: min_lt_v4i64:
994; AVX1:       # %bb.0:
995; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
996; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
997; AVX1-NEXT:    # xmm3 = mem[0,0]
998; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
999; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1000; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
1001; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
1002; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
1003; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
1004; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1005; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1006; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1007; AVX1-NEXT:    retq
1008;
1009; AVX2-LABEL: min_lt_v4i64:
1010; AVX2:       # %bb.0:
1011; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1012; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1013; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
1014; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
1015; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1016; AVX2-NEXT:    retq
1017;
1018; AVX512-LABEL: min_lt_v4i64:
1019; AVX512:       # %bb.0:
1020; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1021; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1022; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1023; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1024; AVX512-NEXT:    retq
1025  %1 = icmp ult <4 x i64> %a, %b
1026  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1027  ret <4 x i64> %2
1028}
1029
1030define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1031; SSE2-LABEL: min_lt_v4i32:
1032; SSE2:       # %bb.0:
1033; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1034; SSE2-NEXT:    movdqa %xmm0, %xmm3
1035; SSE2-NEXT:    pxor %xmm2, %xmm3
1036; SSE2-NEXT:    pxor %xmm1, %xmm2
1037; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1038; SSE2-NEXT:    pand %xmm2, %xmm0
1039; SSE2-NEXT:    pandn %xmm1, %xmm2
1040; SSE2-NEXT:    por %xmm2, %xmm0
1041; SSE2-NEXT:    retq
1042;
1043; SSE41-LABEL: min_lt_v4i32:
1044; SSE41:       # %bb.0:
1045; SSE41-NEXT:    pminud %xmm1, %xmm0
1046; SSE41-NEXT:    retq
1047;
1048; SSE42-LABEL: min_lt_v4i32:
1049; SSE42:       # %bb.0:
1050; SSE42-NEXT:    pminud %xmm1, %xmm0
1051; SSE42-NEXT:    retq
1052;
1053; AVX-LABEL: min_lt_v4i32:
1054; AVX:       # %bb.0:
1055; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1056; AVX-NEXT:    retq
1057  %1 = icmp ult <4 x i32> %a, %b
1058  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1059  ret <4 x i32> %2
1060}
1061
1062define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1063; SSE2-LABEL: min_lt_v8i32:
1064; SSE2:       # %bb.0:
1065; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1066; SSE2-NEXT:    movdqa %xmm0, %xmm5
1067; SSE2-NEXT:    pxor %xmm4, %xmm5
1068; SSE2-NEXT:    movdqa %xmm2, %xmm6
1069; SSE2-NEXT:    pxor %xmm4, %xmm6
1070; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1071; SSE2-NEXT:    pand %xmm6, %xmm0
1072; SSE2-NEXT:    pandn %xmm2, %xmm6
1073; SSE2-NEXT:    por %xmm6, %xmm0
1074; SSE2-NEXT:    movdqa %xmm1, %xmm2
1075; SSE2-NEXT:    pxor %xmm4, %xmm2
1076; SSE2-NEXT:    pxor %xmm3, %xmm4
1077; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1078; SSE2-NEXT:    pand %xmm4, %xmm1
1079; SSE2-NEXT:    pandn %xmm3, %xmm4
1080; SSE2-NEXT:    por %xmm4, %xmm1
1081; SSE2-NEXT:    retq
1082;
1083; SSE41-LABEL: min_lt_v8i32:
1084; SSE41:       # %bb.0:
1085; SSE41-NEXT:    pminud %xmm2, %xmm0
1086; SSE41-NEXT:    pminud %xmm3, %xmm1
1087; SSE41-NEXT:    retq
1088;
1089; SSE42-LABEL: min_lt_v8i32:
1090; SSE42:       # %bb.0:
1091; SSE42-NEXT:    pminud %xmm2, %xmm0
1092; SSE42-NEXT:    pminud %xmm3, %xmm1
1093; SSE42-NEXT:    retq
1094;
1095; AVX1-LABEL: min_lt_v8i32:
1096; AVX1:       # %bb.0:
1097; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1098; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1099; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1100; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1101; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1102; AVX1-NEXT:    retq
1103;
1104; AVX2-LABEL: min_lt_v8i32:
1105; AVX2:       # %bb.0:
1106; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1107; AVX2-NEXT:    retq
1108;
1109; AVX512-LABEL: min_lt_v8i32:
1110; AVX512:       # %bb.0:
1111; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1112; AVX512-NEXT:    retq
1113  %1 = icmp ult <8 x i32> %a, %b
1114  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1115  ret <8 x i32> %2
1116}
1117
1118define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1119; SSE2-LABEL: min_lt_v8i16:
1120; SSE2:       # %bb.0:
1121; SSE2-NEXT:    movdqa %xmm0, %xmm2
1122; SSE2-NEXT:    psubusw %xmm1, %xmm2
1123; SSE2-NEXT:    psubw %xmm2, %xmm0
1124; SSE2-NEXT:    retq
1125;
1126; SSE41-LABEL: min_lt_v8i16:
1127; SSE41:       # %bb.0:
1128; SSE41-NEXT:    pminuw %xmm1, %xmm0
1129; SSE41-NEXT:    retq
1130;
1131; SSE42-LABEL: min_lt_v8i16:
1132; SSE42:       # %bb.0:
1133; SSE42-NEXT:    pminuw %xmm1, %xmm0
1134; SSE42-NEXT:    retq
1135;
1136; AVX-LABEL: min_lt_v8i16:
1137; AVX:       # %bb.0:
1138; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1139; AVX-NEXT:    retq
1140  %1 = icmp ult <8 x i16> %a, %b
1141  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1142  ret <8 x i16> %2
1143}
1144
1145define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1146; SSE2-LABEL: min_lt_v16i16:
1147; SSE2:       # %bb.0:
1148; SSE2-NEXT:    movdqa %xmm0, %xmm4
1149; SSE2-NEXT:    psubusw %xmm2, %xmm4
1150; SSE2-NEXT:    psubw %xmm4, %xmm0
1151; SSE2-NEXT:    movdqa %xmm1, %xmm2
1152; SSE2-NEXT:    psubusw %xmm3, %xmm2
1153; SSE2-NEXT:    psubw %xmm2, %xmm1
1154; SSE2-NEXT:    retq
1155;
1156; SSE41-LABEL: min_lt_v16i16:
1157; SSE41:       # %bb.0:
1158; SSE41-NEXT:    pminuw %xmm2, %xmm0
1159; SSE41-NEXT:    pminuw %xmm3, %xmm1
1160; SSE41-NEXT:    retq
1161;
1162; SSE42-LABEL: min_lt_v16i16:
1163; SSE42:       # %bb.0:
1164; SSE42-NEXT:    pminuw %xmm2, %xmm0
1165; SSE42-NEXT:    pminuw %xmm3, %xmm1
1166; SSE42-NEXT:    retq
1167;
1168; AVX1-LABEL: min_lt_v16i16:
1169; AVX1:       # %bb.0:
1170; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1171; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1172; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1173; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1174; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1175; AVX1-NEXT:    retq
1176;
1177; AVX2-LABEL: min_lt_v16i16:
1178; AVX2:       # %bb.0:
1179; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1180; AVX2-NEXT:    retq
1181;
1182; AVX512-LABEL: min_lt_v16i16:
1183; AVX512:       # %bb.0:
1184; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1185; AVX512-NEXT:    retq
1186  %1 = icmp ult <16 x i16> %a, %b
1187  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1188  ret <16 x i16> %2
1189}
1190
1191define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1192; SSE-LABEL: min_lt_v16i8:
1193; SSE:       # %bb.0:
1194; SSE-NEXT:    pminub %xmm1, %xmm0
1195; SSE-NEXT:    retq
1196;
1197; AVX-LABEL: min_lt_v16i8:
1198; AVX:       # %bb.0:
1199; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1200; AVX-NEXT:    retq
1201  %1 = icmp ult <16 x i8> %a, %b
1202  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1203  ret <16 x i8> %2
1204}
1205
1206define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1207; SSE-LABEL: min_lt_v32i8:
1208; SSE:       # %bb.0:
1209; SSE-NEXT:    pminub %xmm2, %xmm0
1210; SSE-NEXT:    pminub %xmm3, %xmm1
1211; SSE-NEXT:    retq
1212;
1213; AVX1-LABEL: min_lt_v32i8:
1214; AVX1:       # %bb.0:
1215; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1216; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1217; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1218; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1219; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1220; AVX1-NEXT:    retq
1221;
1222; AVX2-LABEL: min_lt_v32i8:
1223; AVX2:       # %bb.0:
1224; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1225; AVX2-NEXT:    retq
1226;
1227; AVX512-LABEL: min_lt_v32i8:
1228; AVX512:       # %bb.0:
1229; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1230; AVX512-NEXT:    retq
1231  %1 = icmp ult <32 x i8> %a, %b
1232  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1233  ret <32 x i8> %2
1234}
1235
1236;
1237; Unsigned Minimum (LE)
1238;
1239
1240define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1241; SSE2-LABEL: min_le_v2i64:
1242; SSE2:       # %bb.0:
1243; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1244; SSE2-NEXT:    movdqa %xmm0, %xmm3
1245; SSE2-NEXT:    pxor %xmm2, %xmm3
1246; SSE2-NEXT:    pxor %xmm1, %xmm2
1247; SSE2-NEXT:    movdqa %xmm2, %xmm4
1248; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1249; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1250; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1251; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1252; SSE2-NEXT:    pand %xmm5, %xmm2
1253; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1254; SSE2-NEXT:    por %xmm2, %xmm3
1255; SSE2-NEXT:    pand %xmm3, %xmm0
1256; SSE2-NEXT:    pandn %xmm1, %xmm3
1257; SSE2-NEXT:    por %xmm3, %xmm0
1258; SSE2-NEXT:    retq
1259;
1260; SSE41-LABEL: min_le_v2i64:
1261; SSE41:       # %bb.0:
1262; SSE41-NEXT:    movdqa %xmm0, %xmm2
1263; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1264; SSE41-NEXT:    pxor %xmm3, %xmm0
1265; SSE41-NEXT:    pxor %xmm1, %xmm3
1266; SSE41-NEXT:    movdqa %xmm3, %xmm4
1267; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1268; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1269; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1270; SSE41-NEXT:    pand %xmm4, %xmm0
1271; SSE41-NEXT:    por %xmm3, %xmm0
1272; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1273; SSE41-NEXT:    movapd %xmm1, %xmm0
1274; SSE41-NEXT:    retq
1275;
1276; SSE42-LABEL: min_le_v2i64:
1277; SSE42:       # %bb.0:
1278; SSE42-NEXT:    movdqa %xmm0, %xmm2
1279; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1280; SSE42-NEXT:    movdqa %xmm2, %xmm3
1281; SSE42-NEXT:    pxor %xmm0, %xmm3
1282; SSE42-NEXT:    pxor %xmm1, %xmm0
1283; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1284; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1285; SSE42-NEXT:    movapd %xmm1, %xmm0
1286; SSE42-NEXT:    retq
1287;
1288; AVX1-LABEL: min_le_v2i64:
1289; AVX1:       # %bb.0:
1290; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1291; AVX1-NEXT:    # xmm2 = mem[0,0]
1292; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1293; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1294; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1295; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1296; AVX1-NEXT:    retq
1297;
1298; AVX2-LABEL: min_le_v2i64:
1299; AVX2:       # %bb.0:
1300; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1301; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1302; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1303; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1304; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1305; AVX2-NEXT:    retq
1306;
1307; AVX512-LABEL: min_le_v2i64:
1308; AVX512:       # %bb.0:
1309; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1310; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1311; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1312; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1313; AVX512-NEXT:    vzeroupper
1314; AVX512-NEXT:    retq
1315  %1 = icmp ule <2 x i64> %a, %b
1316  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1317  ret <2 x i64> %2
1318}
1319
1320define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1321; SSE2-LABEL: min_le_v4i64:
1322; SSE2:       # %bb.0:
1323; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1324; SSE2-NEXT:    movdqa %xmm0, %xmm5
1325; SSE2-NEXT:    pxor %xmm4, %xmm5
1326; SSE2-NEXT:    movdqa %xmm2, %xmm6
1327; SSE2-NEXT:    pxor %xmm4, %xmm6
1328; SSE2-NEXT:    movdqa %xmm6, %xmm7
1329; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1330; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1331; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1332; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1333; SSE2-NEXT:    pand %xmm8, %xmm5
1334; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1335; SSE2-NEXT:    por %xmm5, %xmm6
1336; SSE2-NEXT:    pand %xmm6, %xmm0
1337; SSE2-NEXT:    pandn %xmm2, %xmm6
1338; SSE2-NEXT:    por %xmm6, %xmm0
1339; SSE2-NEXT:    movdqa %xmm1, %xmm2
1340; SSE2-NEXT:    pxor %xmm4, %xmm2
1341; SSE2-NEXT:    pxor %xmm3, %xmm4
1342; SSE2-NEXT:    movdqa %xmm4, %xmm5
1343; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
1344; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1345; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1346; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1347; SSE2-NEXT:    pand %xmm6, %xmm2
1348; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1349; SSE2-NEXT:    por %xmm2, %xmm4
1350; SSE2-NEXT:    pand %xmm4, %xmm1
1351; SSE2-NEXT:    pandn %xmm3, %xmm4
1352; SSE2-NEXT:    por %xmm4, %xmm1
1353; SSE2-NEXT:    retq
1354;
1355; SSE41-LABEL: min_le_v4i64:
1356; SSE41:       # %bb.0:
1357; SSE41-NEXT:    movdqa %xmm0, %xmm4
1358; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
1359; SSE41-NEXT:    pxor %xmm5, %xmm0
1360; SSE41-NEXT:    movdqa %xmm2, %xmm6
1361; SSE41-NEXT:    pxor %xmm5, %xmm6
1362; SSE41-NEXT:    movdqa %xmm6, %xmm7
1363; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
1364; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1365; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1366; SSE41-NEXT:    pand %xmm7, %xmm0
1367; SSE41-NEXT:    por %xmm6, %xmm0
1368; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1369; SSE41-NEXT:    movdqa %xmm1, %xmm0
1370; SSE41-NEXT:    pxor %xmm5, %xmm0
1371; SSE41-NEXT:    pxor %xmm3, %xmm5
1372; SSE41-NEXT:    movdqa %xmm5, %xmm4
1373; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1374; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
1375; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
1376; SSE41-NEXT:    pand %xmm4, %xmm0
1377; SSE41-NEXT:    por %xmm5, %xmm0
1378; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1379; SSE41-NEXT:    movapd %xmm2, %xmm0
1380; SSE41-NEXT:    movapd %xmm3, %xmm1
1381; SSE41-NEXT:    retq
1382;
1383; SSE42-LABEL: min_le_v4i64:
1384; SSE42:       # %bb.0:
1385; SSE42-NEXT:    movdqa %xmm0, %xmm4
1386; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
1387; SSE42-NEXT:    movdqa %xmm0, %xmm6
1388; SSE42-NEXT:    pxor %xmm5, %xmm6
1389; SSE42-NEXT:    movdqa %xmm2, %xmm0
1390; SSE42-NEXT:    pxor %xmm5, %xmm0
1391; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1392; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1393; SSE42-NEXT:    movdqa %xmm1, %xmm0
1394; SSE42-NEXT:    pxor %xmm5, %xmm0
1395; SSE42-NEXT:    pxor %xmm3, %xmm5
1396; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
1397; SSE42-NEXT:    movdqa %xmm5, %xmm0
1398; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1399; SSE42-NEXT:    movapd %xmm2, %xmm0
1400; SSE42-NEXT:    movapd %xmm3, %xmm1
1401; SSE42-NEXT:    retq
1402;
1403; AVX1-LABEL: min_le_v4i64:
1404; AVX1:       # %bb.0:
1405; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1406; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1407; AVX1-NEXT:    # xmm3 = mem[0,0]
1408; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
1409; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1410; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
1411; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
1412; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
1413; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
1414; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1415; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1416; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1417; AVX1-NEXT:    retq
1418;
1419; AVX2-LABEL: min_le_v4i64:
1420; AVX2:       # %bb.0:
1421; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1422; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1423; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
1424; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
1425; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1426; AVX2-NEXT:    retq
1427;
1428; AVX512-LABEL: min_le_v4i64:
1429; AVX512:       # %bb.0:
1430; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1431; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1432; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1433; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1434; AVX512-NEXT:    retq
1435  %1 = icmp ule <4 x i64> %a, %b
1436  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1437  ret <4 x i64> %2
1438}
1439
1440define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1441; SSE2-LABEL: min_le_v4i32:
1442; SSE2:       # %bb.0:
1443; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1444; SSE2-NEXT:    movdqa %xmm0, %xmm3
1445; SSE2-NEXT:    pxor %xmm2, %xmm3
1446; SSE2-NEXT:    pxor %xmm1, %xmm2
1447; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1448; SSE2-NEXT:    pand %xmm2, %xmm0
1449; SSE2-NEXT:    pandn %xmm1, %xmm2
1450; SSE2-NEXT:    por %xmm2, %xmm0
1451; SSE2-NEXT:    retq
1452;
1453; SSE41-LABEL: min_le_v4i32:
1454; SSE41:       # %bb.0:
1455; SSE41-NEXT:    pminud %xmm1, %xmm0
1456; SSE41-NEXT:    retq
1457;
1458; SSE42-LABEL: min_le_v4i32:
1459; SSE42:       # %bb.0:
1460; SSE42-NEXT:    pminud %xmm1, %xmm0
1461; SSE42-NEXT:    retq
1462;
1463; AVX-LABEL: min_le_v4i32:
1464; AVX:       # %bb.0:
1465; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1466; AVX-NEXT:    retq
1467  %1 = icmp ule <4 x i32> %a, %b
1468  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1469  ret <4 x i32> %2
1470}
1471
1472define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1473; SSE2-LABEL: min_le_v8i32:
1474; SSE2:       # %bb.0:
1475; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1476; SSE2-NEXT:    movdqa %xmm0, %xmm5
1477; SSE2-NEXT:    pxor %xmm4, %xmm5
1478; SSE2-NEXT:    movdqa %xmm2, %xmm6
1479; SSE2-NEXT:    pxor %xmm4, %xmm6
1480; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1481; SSE2-NEXT:    pand %xmm6, %xmm0
1482; SSE2-NEXT:    pandn %xmm2, %xmm6
1483; SSE2-NEXT:    por %xmm6, %xmm0
1484; SSE2-NEXT:    movdqa %xmm1, %xmm2
1485; SSE2-NEXT:    pxor %xmm4, %xmm2
1486; SSE2-NEXT:    pxor %xmm3, %xmm4
1487; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1488; SSE2-NEXT:    pand %xmm4, %xmm1
1489; SSE2-NEXT:    pandn %xmm3, %xmm4
1490; SSE2-NEXT:    por %xmm4, %xmm1
1491; SSE2-NEXT:    retq
1492;
1493; SSE41-LABEL: min_le_v8i32:
1494; SSE41:       # %bb.0:
1495; SSE41-NEXT:    pminud %xmm2, %xmm0
1496; SSE41-NEXT:    pminud %xmm3, %xmm1
1497; SSE41-NEXT:    retq
1498;
1499; SSE42-LABEL: min_le_v8i32:
1500; SSE42:       # %bb.0:
1501; SSE42-NEXT:    pminud %xmm2, %xmm0
1502; SSE42-NEXT:    pminud %xmm3, %xmm1
1503; SSE42-NEXT:    retq
1504;
1505; AVX1-LABEL: min_le_v8i32:
1506; AVX1:       # %bb.0:
1507; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1508; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1509; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1510; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1511; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1512; AVX1-NEXT:    retq
1513;
1514; AVX2-LABEL: min_le_v8i32:
1515; AVX2:       # %bb.0:
1516; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1517; AVX2-NEXT:    retq
1518;
1519; AVX512-LABEL: min_le_v8i32:
1520; AVX512:       # %bb.0:
1521; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1522; AVX512-NEXT:    retq
1523  %1 = icmp ule <8 x i32> %a, %b
1524  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1525  ret <8 x i32> %2
1526}
1527
1528define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1529; SSE2-LABEL: min_le_v8i16:
1530; SSE2:       # %bb.0:
1531; SSE2-NEXT:    movdqa %xmm0, %xmm2
1532; SSE2-NEXT:    psubusw %xmm1, %xmm2
1533; SSE2-NEXT:    psubw %xmm2, %xmm0
1534; SSE2-NEXT:    retq
1535;
1536; SSE41-LABEL: min_le_v8i16:
1537; SSE41:       # %bb.0:
1538; SSE41-NEXT:    pminuw %xmm1, %xmm0
1539; SSE41-NEXT:    retq
1540;
1541; SSE42-LABEL: min_le_v8i16:
1542; SSE42:       # %bb.0:
1543; SSE42-NEXT:    pminuw %xmm1, %xmm0
1544; SSE42-NEXT:    retq
1545;
1546; AVX-LABEL: min_le_v8i16:
1547; AVX:       # %bb.0:
1548; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1549; AVX-NEXT:    retq
1550  %1 = icmp ule <8 x i16> %a, %b
1551  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1552  ret <8 x i16> %2
1553}
1554
1555define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1556; SSE2-LABEL: min_le_v16i16:
1557; SSE2:       # %bb.0:
1558; SSE2-NEXT:    movdqa %xmm0, %xmm4
1559; SSE2-NEXT:    psubusw %xmm2, %xmm4
1560; SSE2-NEXT:    psubw %xmm4, %xmm0
1561; SSE2-NEXT:    movdqa %xmm1, %xmm2
1562; SSE2-NEXT:    psubusw %xmm3, %xmm2
1563; SSE2-NEXT:    psubw %xmm2, %xmm1
1564; SSE2-NEXT:    retq
1565;
1566; SSE41-LABEL: min_le_v16i16:
1567; SSE41:       # %bb.0:
1568; SSE41-NEXT:    pminuw %xmm2, %xmm0
1569; SSE41-NEXT:    pminuw %xmm3, %xmm1
1570; SSE41-NEXT:    retq
1571;
1572; SSE42-LABEL: min_le_v16i16:
1573; SSE42:       # %bb.0:
1574; SSE42-NEXT:    pminuw %xmm2, %xmm0
1575; SSE42-NEXT:    pminuw %xmm3, %xmm1
1576; SSE42-NEXT:    retq
1577;
1578; AVX1-LABEL: min_le_v16i16:
1579; AVX1:       # %bb.0:
1580; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1581; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1582; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1583; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1584; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1585; AVX1-NEXT:    retq
1586;
1587; AVX2-LABEL: min_le_v16i16:
1588; AVX2:       # %bb.0:
1589; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1590; AVX2-NEXT:    retq
1591;
1592; AVX512-LABEL: min_le_v16i16:
1593; AVX512:       # %bb.0:
1594; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1595; AVX512-NEXT:    retq
1596  %1 = icmp ule <16 x i16> %a, %b
1597  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1598  ret <16 x i16> %2
1599}
1600
1601define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1602; SSE-LABEL: min_le_v16i8:
1603; SSE:       # %bb.0:
1604; SSE-NEXT:    pminub %xmm1, %xmm0
1605; SSE-NEXT:    retq
1606;
1607; AVX-LABEL: min_le_v16i8:
1608; AVX:       # %bb.0:
1609; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1610; AVX-NEXT:    retq
1611  %1 = icmp ule <16 x i8> %a, %b
1612  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1613  ret <16 x i8> %2
1614}
1615
1616define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1617; SSE-LABEL: min_le_v32i8:
1618; SSE:       # %bb.0:
1619; SSE-NEXT:    pminub %xmm2, %xmm0
1620; SSE-NEXT:    pminub %xmm3, %xmm1
1621; SSE-NEXT:    retq
1622;
1623; AVX1-LABEL: min_le_v32i8:
1624; AVX1:       # %bb.0:
1625; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1626; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1627; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1628; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1629; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1630; AVX1-NEXT:    retq
1631;
1632; AVX2-LABEL: min_le_v32i8:
1633; AVX2:       # %bb.0:
1634; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1635; AVX2-NEXT:    retq
1636;
1637; AVX512-LABEL: min_le_v32i8:
1638; AVX512:       # %bb.0:
1639; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1640; AVX512-NEXT:    retq
1641  %1 = icmp ule <32 x i8> %a, %b
1642  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1643  ret <32 x i8> %2
1644}
1645
1646;
1647; Constant Folding
1648;
1649
1650define <2 x i64> @max_gt_v2i64c() {
1651; SSE-LABEL: max_gt_v2i64c:
1652; SSE:       # %bb.0:
1653; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1654; SSE-NEXT:    retq
1655;
1656; AVX1-LABEL: max_gt_v2i64c:
1657; AVX1:       # %bb.0:
1658; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1659; AVX1-NEXT:    retq
1660;
1661; AVX2-LABEL: max_gt_v2i64c:
1662; AVX2:       # %bb.0:
1663; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1664; AVX2-NEXT:    retq
1665;
1666; AVX512-LABEL: max_gt_v2i64c:
1667; AVX512:       # %bb.0:
1668; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [18446744073709551615,7]
1669; AVX512-NEXT:    retq
1670  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1671  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1672  %3 = icmp ugt <2 x i64> %1, %2
1673  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1674  ret <2 x i64> %4
1675}
1676
1677define <4 x i64> @max_gt_v4i64c() {
1678; SSE-LABEL: max_gt_v4i64c:
1679; SSE:       # %bb.0:
1680; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
1681; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
1682; SSE-NEXT:    retq
1683;
1684; AVX1-LABEL: max_gt_v4i64c:
1685; AVX1:       # %bb.0:
1686; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1687; AVX1-NEXT:    retq
1688;
1689; AVX2-LABEL: max_gt_v4i64c:
1690; AVX2:       # %bb.0:
1691; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1692; AVX2-NEXT:    retq
1693;
1694; AVX512-LABEL: max_gt_v4i64c:
1695; AVX512:       # %bb.0:
1696; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1697; AVX512-NEXT:    retq
1698  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1699  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1700  %3 = icmp ugt <4 x i64> %1, %2
1701  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1702  ret <4 x i64> %4
1703}
1704
1705define <4 x i32> @max_gt_v4i32c() {
1706; SSE-LABEL: max_gt_v4i32c:
1707; SSE:       # %bb.0:
1708; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1709; SSE-NEXT:    retq
1710;
1711; AVX1-LABEL: max_gt_v4i32c:
1712; AVX1:       # %bb.0:
1713; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1714; AVX1-NEXT:    retq
1715;
1716; AVX2-LABEL: max_gt_v4i32c:
1717; AVX2:       # %bb.0:
1718; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1719; AVX2-NEXT:    retq
1720;
1721; AVX512-LABEL: max_gt_v4i32c:
1722; AVX512:       # %bb.0:
1723; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1724; AVX512-NEXT:    retq
1725  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1726  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1727  %3 = icmp ugt <4 x i32> %1, %2
1728  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1729  ret <4 x i32> %4
1730}
1731
1732define <8 x i32> @max_gt_v8i32c() {
1733; SSE-LABEL: max_gt_v8i32c:
1734; SSE:       # %bb.0:
1735; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1736; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
1737; SSE-NEXT:    retq
1738;
1739; AVX1-LABEL: max_gt_v8i32c:
1740; AVX1:       # %bb.0:
1741; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1742; AVX1-NEXT:    retq
1743;
1744; AVX2-LABEL: max_gt_v8i32c:
1745; AVX2:       # %bb.0:
1746; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1747; AVX2-NEXT:    retq
1748;
1749; AVX512-LABEL: max_gt_v8i32c:
1750; AVX512:       # %bb.0:
1751; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1752; AVX512-NEXT:    retq
1753  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1754  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1755  %3 = icmp ugt <8 x i32> %1, %2
1756  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1757  ret <8 x i32> %4
1758}
1759
1760define <8 x i16> @max_gt_v8i16c() {
1761; SSE-LABEL: max_gt_v8i16c:
1762; SSE:       # %bb.0:
1763; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1764; SSE-NEXT:    retq
1765;
1766; AVX-LABEL: max_gt_v8i16c:
1767; AVX:       # %bb.0:
1768; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1769; AVX-NEXT:    retq
1770  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1771  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1772  %3 = icmp ugt <8 x i16> %1, %2
1773  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1774  ret <8 x i16> %4
1775}
1776
1777define <16 x i16> @max_gt_v16i16c() {
1778; SSE-LABEL: max_gt_v16i16c:
1779; SSE:       # %bb.0:
1780; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1781; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1782; SSE-NEXT:    retq
1783;
1784; AVX-LABEL: max_gt_v16i16c:
1785; AVX:       # %bb.0:
1786; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1787; AVX-NEXT:    retq
1788  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1789  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1790  %3 = icmp ugt <16 x i16> %1, %2
1791  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1792  ret <16 x i16> %4
1793}
1794
1795define <16 x i8> @max_gt_v16i8c() {
1796; SSE-LABEL: max_gt_v16i8c:
1797; SSE:       # %bb.0:
1798; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1799; SSE-NEXT:    retq
1800;
1801; AVX-LABEL: max_gt_v16i8c:
1802; AVX:       # %bb.0:
1803; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1804; AVX-NEXT:    retq
1805  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1806  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1807  %3 = icmp ugt <16 x i8> %1, %2
1808  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1809  ret <16 x i8> %4
1810}
1811
1812define <2 x i64> @max_ge_v2i64c() {
1813; SSE-LABEL: max_ge_v2i64c:
1814; SSE:       # %bb.0:
1815; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1816; SSE-NEXT:    retq
1817;
1818; AVX1-LABEL: max_ge_v2i64c:
1819; AVX1:       # %bb.0:
1820; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1821; AVX1-NEXT:    retq
1822;
1823; AVX2-LABEL: max_ge_v2i64c:
1824; AVX2:       # %bb.0:
1825; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1826; AVX2-NEXT:    retq
1827;
1828; AVX512-LABEL: max_ge_v2i64c:
1829; AVX512:       # %bb.0:
1830; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [18446744073709551615,7]
1831; AVX512-NEXT:    retq
1832  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1833  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1834  %3 = icmp uge <2 x i64> %1, %2
1835  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1836  ret <2 x i64> %4
1837}
1838
1839define <4 x i64> @max_ge_v4i64c() {
1840; SSE-LABEL: max_ge_v4i64c:
1841; SSE:       # %bb.0:
1842; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
1843; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
1844; SSE-NEXT:    retq
1845;
1846; AVX1-LABEL: max_ge_v4i64c:
1847; AVX1:       # %bb.0:
1848; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1849; AVX1-NEXT:    retq
1850;
1851; AVX2-LABEL: max_ge_v4i64c:
1852; AVX2:       # %bb.0:
1853; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1854; AVX2-NEXT:    retq
1855;
1856; AVX512-LABEL: max_ge_v4i64c:
1857; AVX512:       # %bb.0:
1858; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1859; AVX512-NEXT:    retq
1860  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1861  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1862  %3 = icmp uge <4 x i64> %1, %2
1863  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1864  ret <4 x i64> %4
1865}
1866
1867define <4 x i32> @max_ge_v4i32c() {
1868; SSE-LABEL: max_ge_v4i32c:
1869; SSE:       # %bb.0:
1870; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1871; SSE-NEXT:    retq
1872;
1873; AVX1-LABEL: max_ge_v4i32c:
1874; AVX1:       # %bb.0:
1875; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1876; AVX1-NEXT:    retq
1877;
1878; AVX2-LABEL: max_ge_v4i32c:
1879; AVX2:       # %bb.0:
1880; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1881; AVX2-NEXT:    retq
1882;
1883; AVX512-LABEL: max_ge_v4i32c:
1884; AVX512:       # %bb.0:
1885; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1886; AVX512-NEXT:    retq
1887  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1888  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1889  %3 = icmp uge <4 x i32> %1, %2
1890  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1891  ret <4 x i32> %4
1892}
1893
1894define <8 x i32> @max_ge_v8i32c() {
1895; SSE-LABEL: max_ge_v8i32c:
1896; SSE:       # %bb.0:
1897; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1898; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
1899; SSE-NEXT:    retq
1900;
1901; AVX1-LABEL: max_ge_v8i32c:
1902; AVX1:       # %bb.0:
1903; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1904; AVX1-NEXT:    retq
1905;
1906; AVX2-LABEL: max_ge_v8i32c:
1907; AVX2:       # %bb.0:
1908; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1909; AVX2-NEXT:    retq
1910;
1911; AVX512-LABEL: max_ge_v8i32c:
1912; AVX512:       # %bb.0:
1913; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1914; AVX512-NEXT:    retq
1915  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1916  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1917  %3 = icmp uge <8 x i32> %1, %2
1918  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1919  ret <8 x i32> %4
1920}
1921
1922define <8 x i16> @max_ge_v8i16c() {
1923; SSE-LABEL: max_ge_v8i16c:
1924; SSE:       # %bb.0:
1925; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1926; SSE-NEXT:    retq
1927;
1928; AVX-LABEL: max_ge_v8i16c:
1929; AVX:       # %bb.0:
1930; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1931; AVX-NEXT:    retq
1932  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1933  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1934  %3 = icmp uge <8 x i16> %1, %2
1935  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1936  ret <8 x i16> %4
1937}
1938
1939define <16 x i16> @max_ge_v16i16c() {
1940; SSE-LABEL: max_ge_v16i16c:
1941; SSE:       # %bb.0:
1942; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1943; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1944; SSE-NEXT:    retq
1945;
1946; AVX-LABEL: max_ge_v16i16c:
1947; AVX:       # %bb.0:
1948; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1949; AVX-NEXT:    retq
1950  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1951  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1952  %3 = icmp uge <16 x i16> %1, %2
1953  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1954  ret <16 x i16> %4
1955}
1956
1957define <16 x i8> @max_ge_v16i8c() {
1958; SSE-LABEL: max_ge_v16i8c:
1959; SSE:       # %bb.0:
1960; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1961; SSE-NEXT:    retq
1962;
1963; AVX-LABEL: max_ge_v16i8c:
1964; AVX:       # %bb.0:
1965; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1966; AVX-NEXT:    retq
1967  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1968  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1969  %3 = icmp uge <16 x i8> %1, %2
1970  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1971  ret <16 x i8> %4
1972}
1973
1974define <2 x i64> @min_lt_v2i64c() {
1975; SSE-LABEL: min_lt_v2i64c:
1976; SSE:       # %bb.0:
1977; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1978; SSE-NEXT:    retq
1979;
1980; AVX1-LABEL: min_lt_v2i64c:
1981; AVX1:       # %bb.0:
1982; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1983; AVX1-NEXT:    retq
1984;
1985; AVX2-LABEL: min_lt_v2i64c:
1986; AVX2:       # %bb.0:
1987; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1988; AVX2-NEXT:    retq
1989;
1990; AVX512-LABEL: min_lt_v2i64c:
1991; AVX512:       # %bb.0:
1992; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [18446744073709551609,1]
1993; AVX512-NEXT:    retq
1994  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1995  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1996  %3 = icmp ult <2 x i64> %1, %2
1997  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1998  ret <2 x i64> %4
1999}
2000
2001define <4 x i64> @min_lt_v4i64c() {
2002; SSE-LABEL: min_lt_v4i64c:
2003; SSE:       # %bb.0:
2004; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2005; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
2006; SSE-NEXT:    retq
2007;
2008; AVX1-LABEL: min_lt_v4i64c:
2009; AVX1:       # %bb.0:
2010; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2011; AVX1-NEXT:    retq
2012;
2013; AVX2-LABEL: min_lt_v4i64c:
2014; AVX2:       # %bb.0:
2015; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2016; AVX2-NEXT:    retq
2017;
2018; AVX512-LABEL: min_lt_v4i64c:
2019; AVX512:       # %bb.0:
2020; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2021; AVX512-NEXT:    retq
2022  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2023  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2024  %3 = icmp ult <4 x i64> %1, %2
2025  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2026  ret <4 x i64> %4
2027}
2028
2029define <4 x i32> @min_lt_v4i32c() {
2030; SSE-LABEL: min_lt_v4i32c:
2031; SSE:       # %bb.0:
2032; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2033; SSE-NEXT:    retq
2034;
2035; AVX1-LABEL: min_lt_v4i32c:
2036; AVX1:       # %bb.0:
2037; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2038; AVX1-NEXT:    retq
2039;
2040; AVX2-LABEL: min_lt_v4i32c:
2041; AVX2:       # %bb.0:
2042; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2043; AVX2-NEXT:    retq
2044;
2045; AVX512-LABEL: min_lt_v4i32c:
2046; AVX512:       # %bb.0:
2047; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2048; AVX512-NEXT:    retq
2049  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2050  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2051  %3 = icmp ult <4 x i32> %1, %2
2052  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2053  ret <4 x i32> %4
2054}
2055
2056define <8 x i32> @min_lt_v8i32c() {
2057; SSE-LABEL: min_lt_v8i32c:
2058; SSE:       # %bb.0:
2059; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2060; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
2061; SSE-NEXT:    retq
2062;
2063; AVX1-LABEL: min_lt_v8i32c:
2064; AVX1:       # %bb.0:
2065; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2066; AVX1-NEXT:    retq
2067;
2068; AVX2-LABEL: min_lt_v8i32c:
2069; AVX2:       # %bb.0:
2070; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2071; AVX2-NEXT:    retq
2072;
2073; AVX512-LABEL: min_lt_v8i32c:
2074; AVX512:       # %bb.0:
2075; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2076; AVX512-NEXT:    retq
2077  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2078  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2079  %3 = icmp ult <8 x i32> %1, %2
2080  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2081  ret <8 x i32> %4
2082}
2083
2084define <8 x i16> @min_lt_v8i16c() {
2085; SSE-LABEL: min_lt_v8i16c:
2086; SSE:       # %bb.0:
2087; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
2088; SSE-NEXT:    retq
2089;
2090; AVX-LABEL: min_lt_v8i16c:
2091; AVX:       # %bb.0:
2092; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
2093; AVX-NEXT:    retq
2094  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2095  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16  1, i32 0
2096  %3 = icmp ult <8 x i16> %1, %2
2097  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2098  ret <8 x i16> %4
2099}
2100
2101define <16 x i16> @min_lt_v16i16c() {
2102; SSE-LABEL: min_lt_v16i16c:
2103; SSE:       # %bb.0:
2104; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
2105; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2106; SSE-NEXT:    retq
2107;
2108; AVX-LABEL: min_lt_v16i16c:
2109; AVX:       # %bb.0:
2110; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2111; AVX-NEXT:    retq
2112  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2113  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16  1, i32 0
2114  %3 = icmp ult <16 x i16> %1, %2
2115  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2116  ret <16 x i16> %4
2117}
2118
2119define <16 x i8> @min_lt_v16i8c() {
2120; SSE-LABEL: min_lt_v16i8c:
2121; SSE:       # %bb.0:
2122; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2123; SSE-NEXT:    retq
2124;
2125; AVX-LABEL: min_lt_v16i8c:
2126; AVX:       # %bb.0:
2127; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2128; AVX-NEXT:    retq
2129  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2130  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8  1, i32 0
2131  %3 = icmp ult <16 x i8> %1, %2
2132  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2133  ret <16 x i8> %4
2134}
2135
2136define <2 x i64> @min_le_v2i64c() {
2137; SSE-LABEL: min_le_v2i64c:
2138; SSE:       # %bb.0:
2139; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
2140; SSE-NEXT:    retq
2141;
2142; AVX1-LABEL: min_le_v2i64c:
2143; AVX1:       # %bb.0:
2144; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2145; AVX1-NEXT:    retq
2146;
2147; AVX2-LABEL: min_le_v2i64c:
2148; AVX2:       # %bb.0:
2149; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2150; AVX2-NEXT:    retq
2151;
2152; AVX512-LABEL: min_le_v2i64c:
2153; AVX512:       # %bb.0:
2154; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [18446744073709551609,1]
2155; AVX512-NEXT:    retq
2156  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2157  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2158  %3 = icmp ule <2 x i64> %1, %2
2159  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2160  ret <2 x i64> %4
2161}
2162
2163define <4 x i64> @min_le_v4i64c() {
2164; SSE-LABEL: min_le_v4i64c:
2165; SSE:       # %bb.0:
2166; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2167; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
2168; SSE-NEXT:    retq
2169;
2170; AVX1-LABEL: min_le_v4i64c:
2171; AVX1:       # %bb.0:
2172; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2173; AVX1-NEXT:    retq
2174;
2175; AVX2-LABEL: min_le_v4i64c:
2176; AVX2:       # %bb.0:
2177; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2178; AVX2-NEXT:    retq
2179;
2180; AVX512-LABEL: min_le_v4i64c:
2181; AVX512:       # %bb.0:
2182; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2183; AVX512-NEXT:    retq
2184  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2185  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2186  %3 = icmp ule <4 x i64> %1, %2
2187  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2188  ret <4 x i64> %4
2189}
2190
2191define <4 x i32> @min_le_v4i32c() {
2192; SSE-LABEL: min_le_v4i32c:
2193; SSE:       # %bb.0:
2194; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2195; SSE-NEXT:    retq
2196;
2197; AVX1-LABEL: min_le_v4i32c:
2198; AVX1:       # %bb.0:
2199; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2200; AVX1-NEXT:    retq
2201;
2202; AVX2-LABEL: min_le_v4i32c:
2203; AVX2:       # %bb.0:
2204; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2205; AVX2-NEXT:    retq
2206;
2207; AVX512-LABEL: min_le_v4i32c:
2208; AVX512:       # %bb.0:
2209; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2210; AVX512-NEXT:    retq
2211  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2212  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2213  %3 = icmp ule <4 x i32> %1, %2
2214  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2215  ret <4 x i32> %4
2216}
2217
2218define <8 x i32> @min_le_v8i32c() {
2219; SSE-LABEL: min_le_v8i32c:
2220; SSE:       # %bb.0:
2221; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2222; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
2223; SSE-NEXT:    retq
2224;
2225; AVX1-LABEL: min_le_v8i32c:
2226; AVX1:       # %bb.0:
2227; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2228; AVX1-NEXT:    retq
2229;
2230; AVX2-LABEL: min_le_v8i32c:
2231; AVX2:       # %bb.0:
2232; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2233; AVX2-NEXT:    retq
2234;
2235; AVX512-LABEL: min_le_v8i32c:
2236; AVX512:       # %bb.0:
2237; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2238; AVX512-NEXT:    retq
2239  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2240  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2241  %3 = icmp ule <8 x i32> %1, %2
2242  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2243  ret <8 x i32> %4
2244}
2245
2246define <8 x i16> @min_le_v8i16c() {
2247; SSE-LABEL: min_le_v8i16c:
2248; SSE:       # %bb.0:
2249; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2250; SSE-NEXT:    retq
2251;
2252; AVX-LABEL: min_le_v8i16c:
2253; AVX:       # %bb.0:
2254; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2255; AVX-NEXT:    retq
2256  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2257  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
2258  %3 = icmp ule <8 x i16> %1, %2
2259  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2260  ret <8 x i16> %4
2261}
2262
2263define <16 x i16> @min_le_v16i16c() {
2264; SSE-LABEL: min_le_v16i16c:
2265; SSE:       # %bb.0:
2266; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2267; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2268; SSE-NEXT:    retq
2269;
2270; AVX-LABEL: min_le_v16i16c:
2271; AVX:       # %bb.0:
2272; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2273; AVX-NEXT:    retq
2274  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2275  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2276  %3 = icmp ule <16 x i16> %1, %2
2277  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2278  ret <16 x i16> %4
2279}
2280
2281define <16 x i8> @min_le_v16i8c() {
2282; SSE-LABEL: min_le_v16i8c:
2283; SSE:       # %bb.0:
2284; SSE-NEXT:    movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2285; SSE-NEXT:    retq
2286;
2287; AVX-LABEL: min_le_v16i8c:
2288; AVX:       # %bb.0:
2289; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2290; AVX-NEXT:    retq
2291  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2292  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2293  %3 = icmp ule <16 x i8> %1, %2
2294  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2295  ret <16 x i8> %4
2296}
2297