xref: /llvm-project/llvm/test/CodeGen/X86/vector-compare-all_of.ll (revision 600a83bf9ba2bee5ed1e9867e201f7707b1d8102)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
7
8define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
9; SSE-LABEL: test_v2f64_sext:
10; SSE:       # %bb.0:
11; SSE-NEXT:    cmpltpd %xmm0, %xmm1
12; SSE-NEXT:    movmskpd %xmm1, %ecx
13; SSE-NEXT:    xorl %eax, %eax
14; SSE-NEXT:    cmpl $3, %ecx
15; SSE-NEXT:    sete %al
16; SSE-NEXT:    negq %rax
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: test_v2f64_sext:
20; AVX:       # %bb.0:
21; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
22; AVX-NEXT:    xorl %eax, %eax
23; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
24; AVX-NEXT:    vtestpd %xmm1, %xmm0
25; AVX-NEXT:    sbbq %rax, %rax
26; AVX-NEXT:    retq
27  %c = fcmp ogt <2 x double> %a0, %a1
28  %s = sext <2 x i1> %c to <2 x i64>
29  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
30  %2 = and <2 x i64> %s, %1
31  %3 = extractelement <2 x i64> %2, i32 0
32  ret i64 %3
33}
34
35define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
36; SSE-LABEL: test_v4f64_sext:
37; SSE:       # %bb.0:
38; SSE-NEXT:    cmpltpd %xmm1, %xmm3
39; SSE-NEXT:    cmpltpd %xmm0, %xmm2
40; SSE-NEXT:    andpd %xmm3, %xmm2
41; SSE-NEXT:    movmskpd %xmm2, %ecx
42; SSE-NEXT:    xorl %eax, %eax
43; SSE-NEXT:    cmpl $3, %ecx
44; SSE-NEXT:    sete %al
45; SSE-NEXT:    negq %rax
46; SSE-NEXT:    retq
47;
48; AVX1-LABEL: test_v4f64_sext:
49; AVX1:       # %bb.0:
50; AVX1-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
51; AVX1-NEXT:    xorl %eax, %eax
52; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
53; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
54; AVX1-NEXT:    vtestpd %ymm1, %ymm0
55; AVX1-NEXT:    sbbq %rax, %rax
56; AVX1-NEXT:    vzeroupper
57; AVX1-NEXT:    retq
58;
59; AVX2-LABEL: test_v4f64_sext:
60; AVX2:       # %bb.0:
61; AVX2-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
62; AVX2-NEXT:    xorl %eax, %eax
63; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
64; AVX2-NEXT:    vtestpd %ymm1, %ymm0
65; AVX2-NEXT:    sbbq %rax, %rax
66; AVX2-NEXT:    vzeroupper
67; AVX2-NEXT:    retq
68;
69; AVX512-LABEL: test_v4f64_sext:
70; AVX512:       # %bb.0:
71; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
72; AVX512-NEXT:    xorl %eax, %eax
73; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
74; AVX512-NEXT:    vtestpd %ymm1, %ymm0
75; AVX512-NEXT:    sbbq %rax, %rax
76; AVX512-NEXT:    vzeroupper
77; AVX512-NEXT:    retq
78  %c = fcmp ogt <4 x double> %a0, %a1
79  %s = sext <4 x i1> %c to <4 x i64>
80  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
81  %2 = and <4 x i64> %s, %1
82  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
83  %4 = and <4 x i64> %2, %3
84  %5 = extractelement <4 x i64> %4, i64 0
85  ret i64 %5
86}
87
88define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
89; SSE-LABEL: test_v4f64_legal_sext:
90; SSE:       # %bb.0:
91; SSE-NEXT:    cmpltpd %xmm1, %xmm3
92; SSE-NEXT:    cmpltpd %xmm0, %xmm2
93; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
94; SSE-NEXT:    movmskps %xmm2, %ecx
95; SSE-NEXT:    xorl %eax, %eax
96; SSE-NEXT:    cmpl $15, %ecx
97; SSE-NEXT:    sete %al
98; SSE-NEXT:    negq %rax
99; SSE-NEXT:    retq
100;
101; AVX1OR2-LABEL: test_v4f64_legal_sext:
102; AVX1OR2:       # %bb.0:
103; AVX1OR2-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
104; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm1
105; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
106; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
107; AVX1OR2-NEXT:    xorl %eax, %eax
108; AVX1OR2-NEXT:    vtestps %xmm1, %xmm0
109; AVX1OR2-NEXT:    sbbq %rax, %rax
110; AVX1OR2-NEXT:    vzeroupper
111; AVX1OR2-NEXT:    retq
112;
113; AVX512-LABEL: test_v4f64_legal_sext:
114; AVX512:       # %bb.0:
115; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
116; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
117; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} {z}
118; AVX512-NEXT:    xorl %eax, %eax
119; AVX512-NEXT:    vtestps %xmm0, %xmm1
120; AVX512-NEXT:    sbbq %rax, %rax
121; AVX512-NEXT:    vzeroupper
122; AVX512-NEXT:    retq
123  %c = fcmp ogt <4 x double> %a0, %a1
124  %s = sext <4 x i1> %c to <4 x i32>
125  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
126  %2 = and <4 x i32> %s, %1
127  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
128  %4 = and <4 x i32> %2, %3
129  %5 = extractelement <4 x i32> %4, i64 0
130  %6 = sext i32 %5 to i64
131  ret i64 %6
132}
133
134define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
135; SSE-LABEL: test_v4f32_sext:
136; SSE:       # %bb.0:
137; SSE-NEXT:    cmpltps %xmm0, %xmm1
138; SSE-NEXT:    movmskps %xmm1, %ecx
139; SSE-NEXT:    xorl %eax, %eax
140; SSE-NEXT:    cmpl $15, %ecx
141; SSE-NEXT:    sete %al
142; SSE-NEXT:    negl %eax
143; SSE-NEXT:    retq
144;
145; AVX-LABEL: test_v4f32_sext:
146; AVX:       # %bb.0:
147; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
148; AVX-NEXT:    xorl %eax, %eax
149; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
150; AVX-NEXT:    vtestps %xmm1, %xmm0
151; AVX-NEXT:    sbbl %eax, %eax
152; AVX-NEXT:    retq
153  %c = fcmp ogt <4 x float> %a0, %a1
154  %s = sext <4 x i1> %c to <4 x i32>
155  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
156  %2 = and <4 x i32> %s, %1
157  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
158  %4 = and <4 x i32> %2, %3
159  %5 = extractelement <4 x i32> %4, i32 0
160  ret i32 %5
161}
162
163define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
164; SSE-LABEL: test_v8f32_sext:
165; SSE:       # %bb.0:
166; SSE-NEXT:    cmpltps %xmm1, %xmm3
167; SSE-NEXT:    cmpltps %xmm0, %xmm2
168; SSE-NEXT:    andps %xmm3, %xmm2
169; SSE-NEXT:    movmskps %xmm2, %ecx
170; SSE-NEXT:    xorl %eax, %eax
171; SSE-NEXT:    cmpl $15, %ecx
172; SSE-NEXT:    sete %al
173; SSE-NEXT:    negl %eax
174; SSE-NEXT:    retq
175;
176; AVX1-LABEL: test_v8f32_sext:
177; AVX1:       # %bb.0:
178; AVX1-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
179; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
180; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
181; AVX1-NEXT:    xorl %eax, %eax
182; AVX1-NEXT:    vtestps %ymm1, %ymm0
183; AVX1-NEXT:    sbbl %eax, %eax
184; AVX1-NEXT:    vzeroupper
185; AVX1-NEXT:    retq
186;
187; AVX2-LABEL: test_v8f32_sext:
188; AVX2:       # %bb.0:
189; AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
190; AVX2-NEXT:    xorl %eax, %eax
191; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
192; AVX2-NEXT:    vtestps %ymm1, %ymm0
193; AVX2-NEXT:    sbbl %eax, %eax
194; AVX2-NEXT:    vzeroupper
195; AVX2-NEXT:    retq
196;
197; AVX512-LABEL: test_v8f32_sext:
198; AVX512:       # %bb.0:
199; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
200; AVX512-NEXT:    xorl %eax, %eax
201; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
202; AVX512-NEXT:    vtestps %ymm1, %ymm0
203; AVX512-NEXT:    sbbl %eax, %eax
204; AVX512-NEXT:    vzeroupper
205; AVX512-NEXT:    retq
206  %c = fcmp ogt <8 x float> %a0, %a1
207  %s = sext <8 x i1> %c to <8 x i32>
208  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
209  %2 = and <8 x i32> %s, %1
210  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211  %4 = and <8 x i32> %2, %3
212  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
213  %6 = and <8 x i32> %4, %5
214  %7 = extractelement <8 x i32> %6, i32 0
215  ret i32 %7
216}
217
218define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
219; SSE-LABEL: test_v8f32_legal_sext:
220; SSE:       # %bb.0:
221; SSE-NEXT:    cmpltps %xmm1, %xmm3
222; SSE-NEXT:    cmpltps %xmm0, %xmm2
223; SSE-NEXT:    packssdw %xmm3, %xmm2
224; SSE-NEXT:    pmovmskb %xmm2, %ecx
225; SSE-NEXT:    xorl %eax, %eax
226; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
227; SSE-NEXT:    sete %al
228; SSE-NEXT:    negl %eax
229; SSE-NEXT:    retq
230;
231; AVX1OR2-LABEL: test_v8f32_legal_sext:
232; AVX1OR2:       # %bb.0:
233; AVX1OR2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
234; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm1
235; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
236; AVX1OR2-NEXT:    vpmovmskb %xmm0, %ecx
237; AVX1OR2-NEXT:    xorl %eax, %eax
238; AVX1OR2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
239; AVX1OR2-NEXT:    sete %al
240; AVX1OR2-NEXT:    negl %eax
241; AVX1OR2-NEXT:    vzeroupper
242; AVX1OR2-NEXT:    retq
243;
244; AVX512-LABEL: test_v8f32_legal_sext:
245; AVX512:       # %bb.0:
246; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k0
247; AVX512-NEXT:    vpmovm2w %k0, %xmm0
248; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
249; AVX512-NEXT:    xorl %eax, %eax
250; AVX512-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
251; AVX512-NEXT:    sete %al
252; AVX512-NEXT:    negl %eax
253; AVX512-NEXT:    vzeroupper
254; AVX512-NEXT:    retq
255  %c = fcmp ogt <8 x float> %a0, %a1
256  %s = sext <8 x i1> %c to <8 x i16>
257  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
258  %2 = and <8 x i16> %s, %1
259  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
260  %4 = and <8 x i16> %2, %3
261  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
262  %6 = and <8 x i16> %4, %5
263  %7 = extractelement <8 x i16> %6, i32 0
264  %8 = sext i16 %7 to i32
265  ret i32 %8
266}
267
268define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
269; SSE2-LABEL: test_v2i64_sext:
270; SSE2:       # %bb.0:
271; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
272; SSE2-NEXT:    pxor %xmm2, %xmm1
273; SSE2-NEXT:    pxor %xmm2, %xmm0
274; SSE2-NEXT:    movdqa %xmm0, %xmm2
275; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
276; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
277; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278; SSE2-NEXT:    pand %xmm2, %xmm1
279; SSE2-NEXT:    por %xmm0, %xmm1
280; SSE2-NEXT:    movmskpd %xmm1, %ecx
281; SSE2-NEXT:    xorl %eax, %eax
282; SSE2-NEXT:    cmpl $3, %ecx
283; SSE2-NEXT:    sete %al
284; SSE2-NEXT:    negq %rax
285; SSE2-NEXT:    retq
286;
287; SSE42-LABEL: test_v2i64_sext:
288; SSE42:       # %bb.0:
289; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
290; SSE42-NEXT:    movmskpd %xmm0, %ecx
291; SSE42-NEXT:    xorl %eax, %eax
292; SSE42-NEXT:    cmpl $3, %ecx
293; SSE42-NEXT:    sete %al
294; SSE42-NEXT:    negq %rax
295; SSE42-NEXT:    retq
296;
297; AVX-LABEL: test_v2i64_sext:
298; AVX:       # %bb.0:
299; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
300; AVX-NEXT:    xorl %eax, %eax
301; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
302; AVX-NEXT:    vtestpd %xmm1, %xmm0
303; AVX-NEXT:    sbbq %rax, %rax
304; AVX-NEXT:    retq
305  %c = icmp sgt <2 x i64> %a0, %a1
306  %s = sext <2 x i1> %c to <2 x i64>
307  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
308  %2 = and <2 x i64> %s, %1
309  %3 = extractelement <2 x i64> %2, i32 0
310  ret i64 %3
311}
312
313define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
314; SSE2-LABEL: test_v4i64_sext:
315; SSE2:       # %bb.0:
316; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
317; SSE2-NEXT:    pxor %xmm4, %xmm3
318; SSE2-NEXT:    pxor %xmm4, %xmm1
319; SSE2-NEXT:    movdqa %xmm1, %xmm5
320; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
321; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
322; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
323; SSE2-NEXT:    pand %xmm5, %xmm3
324; SSE2-NEXT:    por %xmm1, %xmm3
325; SSE2-NEXT:    pxor %xmm4, %xmm2
326; SSE2-NEXT:    pxor %xmm4, %xmm0
327; SSE2-NEXT:    movdqa %xmm0, %xmm1
328; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
329; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
330; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
331; SSE2-NEXT:    pand %xmm1, %xmm2
332; SSE2-NEXT:    por %xmm0, %xmm2
333; SSE2-NEXT:    pand %xmm3, %xmm2
334; SSE2-NEXT:    movmskpd %xmm2, %ecx
335; SSE2-NEXT:    xorl %eax, %eax
336; SSE2-NEXT:    cmpl $3, %ecx
337; SSE2-NEXT:    sete %al
338; SSE2-NEXT:    negq %rax
339; SSE2-NEXT:    retq
340;
341; SSE42-LABEL: test_v4i64_sext:
342; SSE42:       # %bb.0:
343; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
344; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
345; SSE42-NEXT:    pand %xmm1, %xmm0
346; SSE42-NEXT:    movmskpd %xmm0, %ecx
347; SSE42-NEXT:    xorl %eax, %eax
348; SSE42-NEXT:    cmpl $3, %ecx
349; SSE42-NEXT:    sete %al
350; SSE42-NEXT:    negq %rax
351; SSE42-NEXT:    retq
352;
353; AVX1-LABEL: test_v4i64_sext:
354; AVX1:       # %bb.0:
355; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
356; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
357; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
358; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
359; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
360; AVX1-NEXT:    xorl %eax, %eax
361; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
362; AVX1-NEXT:    vtestpd %xmm1, %xmm0
363; AVX1-NEXT:    sbbq %rax, %rax
364; AVX1-NEXT:    vzeroupper
365; AVX1-NEXT:    retq
366;
367; AVX2-LABEL: test_v4i64_sext:
368; AVX2:       # %bb.0:
369; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
370; AVX2-NEXT:    xorl %eax, %eax
371; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
372; AVX2-NEXT:    vtestpd %ymm1, %ymm0
373; AVX2-NEXT:    sbbq %rax, %rax
374; AVX2-NEXT:    vzeroupper
375; AVX2-NEXT:    retq
376;
377; AVX512-LABEL: test_v4i64_sext:
378; AVX512:       # %bb.0:
379; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
380; AVX512-NEXT:    xorl %eax, %eax
381; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
382; AVX512-NEXT:    vtestpd %ymm1, %ymm0
383; AVX512-NEXT:    sbbq %rax, %rax
384; AVX512-NEXT:    vzeroupper
385; AVX512-NEXT:    retq
386  %c = icmp sgt <4 x i64> %a0, %a1
387  %s = sext <4 x i1> %c to <4 x i64>
388  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
389  %2 = and <4 x i64> %s, %1
390  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
391  %4 = and <4 x i64> %2, %3
392  %5 = extractelement <4 x i64> %4, i64 0
393  ret i64 %5
394}
395
396define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
397; SSE2-LABEL: test_v4i64_legal_sext:
398; SSE2:       # %bb.0:
399; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
400; SSE2-NEXT:    pxor %xmm4, %xmm3
401; SSE2-NEXT:    pxor %xmm4, %xmm1
402; SSE2-NEXT:    movdqa %xmm1, %xmm5
403; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
404; SSE2-NEXT:    pxor %xmm4, %xmm2
405; SSE2-NEXT:    pxor %xmm4, %xmm0
406; SSE2-NEXT:    movdqa %xmm0, %xmm4
407; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
408; SSE2-NEXT:    movdqa %xmm4, %xmm6
409; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
410; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
411; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
412; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
413; SSE2-NEXT:    andps %xmm6, %xmm0
414; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
415; SSE2-NEXT:    orps %xmm0, %xmm4
416; SSE2-NEXT:    movmskps %xmm4, %ecx
417; SSE2-NEXT:    xorl %eax, %eax
418; SSE2-NEXT:    cmpl $15, %ecx
419; SSE2-NEXT:    sete %al
420; SSE2-NEXT:    negq %rax
421; SSE2-NEXT:    retq
422;
423; SSE42-LABEL: test_v4i64_legal_sext:
424; SSE42:       # %bb.0:
425; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
426; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
427; SSE42-NEXT:    packssdw %xmm1, %xmm0
428; SSE42-NEXT:    movmskps %xmm0, %ecx
429; SSE42-NEXT:    xorl %eax, %eax
430; SSE42-NEXT:    cmpl $15, %ecx
431; SSE42-NEXT:    sete %al
432; SSE42-NEXT:    negq %rax
433; SSE42-NEXT:    retq
434;
435; AVX1-LABEL: test_v4i64_legal_sext:
436; AVX1:       # %bb.0:
437; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
438; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
439; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
440; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
441; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
442; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
443; AVX1-NEXT:    xorl %eax, %eax
444; AVX1-NEXT:    vtestps %xmm1, %xmm0
445; AVX1-NEXT:    sbbq %rax, %rax
446; AVX1-NEXT:    vzeroupper
447; AVX1-NEXT:    retq
448;
449; AVX2-LABEL: test_v4i64_legal_sext:
450; AVX2:       # %bb.0:
451; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
452; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
453; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
454; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
455; AVX2-NEXT:    xorl %eax, %eax
456; AVX2-NEXT:    vtestps %xmm1, %xmm0
457; AVX2-NEXT:    sbbq %rax, %rax
458; AVX2-NEXT:    vzeroupper
459; AVX2-NEXT:    retq
460;
461; AVX512-LABEL: test_v4i64_legal_sext:
462; AVX512:       # %bb.0:
463; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
464; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
465; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} {z}
466; AVX512-NEXT:    xorl %eax, %eax
467; AVX512-NEXT:    vtestps %xmm0, %xmm1
468; AVX512-NEXT:    sbbq %rax, %rax
469; AVX512-NEXT:    vzeroupper
470; AVX512-NEXT:    retq
471  %c = icmp sgt <4 x i64> %a0, %a1
472  %s = sext <4 x i1> %c to <4 x i32>
473  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
474  %2 = and <4 x i32> %s, %1
475  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
476  %4 = and <4 x i32> %2, %3
477  %5 = extractelement <4 x i32> %4, i64 0
478  %6 = sext i32 %5 to i64
479  ret i64 %6
480}
481
482define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
483; SSE-LABEL: test_v4i32_sext:
484; SSE:       # %bb.0:
485; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
486; SSE-NEXT:    movmskps %xmm0, %ecx
487; SSE-NEXT:    xorl %eax, %eax
488; SSE-NEXT:    cmpl $15, %ecx
489; SSE-NEXT:    sete %al
490; SSE-NEXT:    negl %eax
491; SSE-NEXT:    retq
492;
493; AVX-LABEL: test_v4i32_sext:
494; AVX:       # %bb.0:
495; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
496; AVX-NEXT:    xorl %eax, %eax
497; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
498; AVX-NEXT:    vtestps %xmm1, %xmm0
499; AVX-NEXT:    sbbl %eax, %eax
500; AVX-NEXT:    retq
501  %c = icmp sgt <4 x i32> %a0, %a1
502  %s = sext <4 x i1> %c to <4 x i32>
503  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
504  %2 = and <4 x i32> %s, %1
505  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
506  %4 = and <4 x i32> %2, %3
507  %5 = extractelement <4 x i32> %4, i32 0
508  ret i32 %5
509}
510
511define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
512; SSE-LABEL: test_v8i32_sext:
513; SSE:       # %bb.0:
514; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
515; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
516; SSE-NEXT:    pand %xmm1, %xmm0
517; SSE-NEXT:    movmskps %xmm0, %ecx
518; SSE-NEXT:    xorl %eax, %eax
519; SSE-NEXT:    cmpl $15, %ecx
520; SSE-NEXT:    sete %al
521; SSE-NEXT:    negl %eax
522; SSE-NEXT:    retq
523;
524; AVX1-LABEL: test_v8i32_sext:
525; AVX1:       # %bb.0:
526; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
527; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
528; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
529; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
530; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
531; AVX1-NEXT:    xorl %eax, %eax
532; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
533; AVX1-NEXT:    vtestps %xmm1, %xmm0
534; AVX1-NEXT:    sbbl %eax, %eax
535; AVX1-NEXT:    vzeroupper
536; AVX1-NEXT:    retq
537;
538; AVX2-LABEL: test_v8i32_sext:
539; AVX2:       # %bb.0:
540; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
541; AVX2-NEXT:    xorl %eax, %eax
542; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
543; AVX2-NEXT:    vtestps %ymm1, %ymm0
544; AVX2-NEXT:    sbbl %eax, %eax
545; AVX2-NEXT:    vzeroupper
546; AVX2-NEXT:    retq
547;
548; AVX512-LABEL: test_v8i32_sext:
549; AVX512:       # %bb.0:
550; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
551; AVX512-NEXT:    xorl %eax, %eax
552; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
553; AVX512-NEXT:    vtestps %ymm1, %ymm0
554; AVX512-NEXT:    sbbl %eax, %eax
555; AVX512-NEXT:    vzeroupper
556; AVX512-NEXT:    retq
557  %c = icmp sgt <8 x i32> %a0, %a1
558  %s = sext <8 x i1> %c to <8 x i32>
559  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
560  %2 = and <8 x i32> %s, %1
561  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
562  %4 = and <8 x i32> %2, %3
563  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
564  %6 = and <8 x i32> %4, %5
565  %7 = extractelement <8 x i32> %6, i32 0
566  ret i32 %7
567}
568
569define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
570; SSE-LABEL: test_v8i32_legal_sext:
571; SSE:       # %bb.0:
572; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
573; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
574; SSE-NEXT:    packssdw %xmm1, %xmm0
575; SSE-NEXT:    pmovmskb %xmm0, %ecx
576; SSE-NEXT:    xorl %eax, %eax
577; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
578; SSE-NEXT:    sete %al
579; SSE-NEXT:    negl %eax
580; SSE-NEXT:    retq
581;
582; AVX1-LABEL: test_v8i32_legal_sext:
583; AVX1:       # %bb.0:
584; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
585; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
586; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
587; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
588; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
589; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
590; AVX1-NEXT:    xorl %eax, %eax
591; AVX1-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
592; AVX1-NEXT:    sete %al
593; AVX1-NEXT:    negl %eax
594; AVX1-NEXT:    vzeroupper
595; AVX1-NEXT:    retq
596;
597; AVX2-LABEL: test_v8i32_legal_sext:
598; AVX2:       # %bb.0:
599; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
600; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
601; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
602; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
603; AVX2-NEXT:    xorl %eax, %eax
604; AVX2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
605; AVX2-NEXT:    sete %al
606; AVX2-NEXT:    negl %eax
607; AVX2-NEXT:    vzeroupper
608; AVX2-NEXT:    retq
609;
610; AVX512-LABEL: test_v8i32_legal_sext:
611; AVX512:       # %bb.0:
612; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
613; AVX512-NEXT:    vpmovm2w %k0, %xmm0
614; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
615; AVX512-NEXT:    xorl %eax, %eax
616; AVX512-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
617; AVX512-NEXT:    sete %al
618; AVX512-NEXT:    negl %eax
619; AVX512-NEXT:    vzeroupper
620; AVX512-NEXT:    retq
621  %c = icmp sgt <8 x i32> %a0, %a1
622  %s = sext <8 x i1> %c to <8 x i16>
623  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
624  %2 = and <8 x i16> %s, %1
625  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
626  %4 = and <8 x i16> %2, %3
627  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
628  %6 = and <8 x i16> %4, %5
629  %7 = extractelement <8 x i16> %6, i32 0
630  %8 = sext i16 %7 to i32
631  ret i32 %8
632}
633
634define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
635; SSE-LABEL: test_v8i16_sext:
636; SSE:       # %bb.0:
637; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
638; SSE-NEXT:    pmovmskb %xmm0, %ecx
639; SSE-NEXT:    xorl %eax, %eax
640; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
641; SSE-NEXT:    sete %al
642; SSE-NEXT:    negl %eax
643; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
644; SSE-NEXT:    retq
645;
646; AVX-LABEL: test_v8i16_sext:
647; AVX:       # %bb.0:
648; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
649; AVX-NEXT:    vpmovmskb %xmm0, %ecx
650; AVX-NEXT:    xorl %eax, %eax
651; AVX-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
652; AVX-NEXT:    sete %al
653; AVX-NEXT:    negl %eax
654; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
655; AVX-NEXT:    retq
656  %c = icmp sgt <8 x i16> %a0, %a1
657  %s = sext <8 x i1> %c to <8 x i16>
658  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
659  %2 = and <8 x i16> %s, %1
660  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
661  %4 = and <8 x i16> %2, %3
662  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
663  %6 = and <8 x i16> %4, %5
664  %7 = extractelement <8 x i16> %6, i32 0
665  ret i16 %7
666}
667
668define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
669; SSE-LABEL: test_v16i16_sext:
670; SSE:       # %bb.0:
671; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
672; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
673; SSE-NEXT:    pand %xmm1, %xmm0
674; SSE-NEXT:    pmovmskb %xmm0, %ecx
675; SSE-NEXT:    xorl %eax, %eax
676; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
677; SSE-NEXT:    sete %al
678; SSE-NEXT:    negl %eax
679; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
680; SSE-NEXT:    retq
681;
682; AVX1-LABEL: test_v16i16_sext:
683; AVX1:       # %bb.0:
684; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
685; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
686; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
687; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
688; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
689; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
690; AVX1-NEXT:    xorl %eax, %eax
691; AVX1-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
692; AVX1-NEXT:    sete %al
693; AVX1-NEXT:    negl %eax
694; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
695; AVX1-NEXT:    vzeroupper
696; AVX1-NEXT:    retq
697;
698; AVX2-LABEL: test_v16i16_sext:
699; AVX2:       # %bb.0:
700; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
701; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
702; AVX2-NEXT:    xorl %eax, %eax
703; AVX2-NEXT:    cmpl $-1, %ecx
704; AVX2-NEXT:    sete %al
705; AVX2-NEXT:    negl %eax
706; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
707; AVX2-NEXT:    vzeroupper
708; AVX2-NEXT:    retq
709;
710; AVX512-LABEL: test_v16i16_sext:
711; AVX512:       # %bb.0:
712; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
713; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
714; AVX512-NEXT:    xorl %eax, %eax
715; AVX512-NEXT:    cmpl $-1, %ecx
716; AVX512-NEXT:    sete %al
717; AVX512-NEXT:    negl %eax
718; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
719; AVX512-NEXT:    vzeroupper
720; AVX512-NEXT:    retq
721  %c = icmp sgt <16 x i16> %a0, %a1
722  %s = sext <16 x i1> %c to <16 x i16>
723  %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
724  %2 = and <16 x i16> %s, %1
725  %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
726  %4 = and <16 x i16> %2, %3
727  %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
728  %6 = and <16 x i16> %4, %5
729  %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
730  %8 = and <16 x i16> %6, %7
731  %9 = extractelement <16 x i16> %8, i32 0
732  ret i16 %9
733}
734
735define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
736; SSE-LABEL: test_v16i16_legal_sext:
737; SSE:       # %bb.0:
738; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
739; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
740; SSE-NEXT:    packsswb %xmm1, %xmm0
741; SSE-NEXT:    pmovmskb %xmm0, %eax
742; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
743; SSE-NEXT:    sete %al
744; SSE-NEXT:    negb %al
745; SSE-NEXT:    movsbl %al, %eax
746; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
747; SSE-NEXT:    retq
748;
749; AVX1-LABEL: test_v16i16_legal_sext:
750; AVX1:       # %bb.0:
751; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
752; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
753; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
754; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
755; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
756; AVX1-NEXT:    vpmovmskb %xmm0, %eax
757; AVX1-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
758; AVX1-NEXT:    sete %al
759; AVX1-NEXT:    negb %al
760; AVX1-NEXT:    movsbl %al, %eax
761; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
762; AVX1-NEXT:    vzeroupper
763; AVX1-NEXT:    retq
764;
765; AVX2-LABEL: test_v16i16_legal_sext:
766; AVX2:       # %bb.0:
767; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
768; AVX2-NEXT:    vpmovmskb %ymm0, %eax
769; AVX2-NEXT:    cmpl $-1, %eax
770; AVX2-NEXT:    sete %al
771; AVX2-NEXT:    negb %al
772; AVX2-NEXT:    movsbl %al, %eax
773; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
774; AVX2-NEXT:    vzeroupper
775; AVX2-NEXT:    retq
776;
777; AVX512-LABEL: test_v16i16_legal_sext:
778; AVX512:       # %bb.0:
779; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
780; AVX512-NEXT:    vpmovm2b %k0, %xmm0
781; AVX512-NEXT:    vpmovmskb %xmm0, %eax
782; AVX512-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
783; AVX512-NEXT:    sete %al
784; AVX512-NEXT:    negb %al
785; AVX512-NEXT:    movsbl %al, %eax
786; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
787; AVX512-NEXT:    vzeroupper
788; AVX512-NEXT:    retq
789  %c  = icmp sgt <16 x i16> %a0, %a1
790  %s  = sext <16 x i1> %c to <16 x i8>
791  %1  = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
792  %2  = and <16 x i8> %s, %1
793  %3  = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
794  %4  = and <16 x i8> %2, %3
795  %5  = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
796  %6  = and <16 x i8> %4, %5
797  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
798  %8  = and <16 x i8> %6, %7
799  %9  = extractelement <16 x i8> %8, i32 0
800  %10 = sext i8 %9 to i16
801  ret i16 %10
802}
803
804define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
805; SSE-LABEL: test_v16i8_sext:
806; SSE:       # %bb.0:
807; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
808; SSE-NEXT:    pmovmskb %xmm0, %eax
809; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
810; SSE-NEXT:    sete %al
811; SSE-NEXT:    negb %al
812; SSE-NEXT:    retq
813;
814; AVX-LABEL: test_v16i8_sext:
815; AVX:       # %bb.0:
816; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
817; AVX-NEXT:    vpmovmskb %xmm0, %eax
818; AVX-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
819; AVX-NEXT:    sete %al
820; AVX-NEXT:    negb %al
821; AVX-NEXT:    retq
822  %c = icmp sgt <16 x i8> %a0, %a1
823  %s = sext <16 x i1> %c to <16 x i8>
824  %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
825  %2 = and <16 x i8> %s, %1
826  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
827  %4 = and <16 x i8> %2, %3
828  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
829  %6 = and <16 x i8> %4, %5
830  %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
831  %8 = and <16 x i8> %6, %7
832  %9 = extractelement <16 x i8> %8, i32 0
833  ret i8 %9
834}
835
836define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
837; SSE-LABEL: test_v32i8_sext:
838; SSE:       # %bb.0:
839; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
840; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
841; SSE-NEXT:    pand %xmm1, %xmm0
842; SSE-NEXT:    pmovmskb %xmm0, %eax
843; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
844; SSE-NEXT:    sete %al
845; SSE-NEXT:    negb %al
846; SSE-NEXT:    retq
847;
848; AVX1-LABEL: test_v32i8_sext:
849; AVX1:       # %bb.0:
850; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
851; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
852; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
853; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
854; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
855; AVX1-NEXT:    vpmovmskb %xmm0, %eax
856; AVX1-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
857; AVX1-NEXT:    sete %al
858; AVX1-NEXT:    negb %al
859; AVX1-NEXT:    vzeroupper
860; AVX1-NEXT:    retq
861;
862; AVX2-LABEL: test_v32i8_sext:
863; AVX2:       # %bb.0:
864; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
865; AVX2-NEXT:    vpmovmskb %ymm0, %eax
866; AVX2-NEXT:    cmpl $-1, %eax
867; AVX2-NEXT:    sete %al
868; AVX2-NEXT:    negb %al
869; AVX2-NEXT:    vzeroupper
870; AVX2-NEXT:    retq
871;
872; AVX512-LABEL: test_v32i8_sext:
873; AVX512:       # %bb.0:
874; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
875; AVX512-NEXT:    vpmovmskb %ymm0, %eax
876; AVX512-NEXT:    cmpl $-1, %eax
877; AVX512-NEXT:    sete %al
878; AVX512-NEXT:    negb %al
879; AVX512-NEXT:    vzeroupper
880; AVX512-NEXT:    retq
881  %c  = icmp sgt <32 x i8> %a0, %a1
882  %s  = sext <32 x i1> %c to <32 x i8>
883  %1  = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
884  %2  = and <32 x i8> %s, %1
885  %3  = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
886  %4  = and <32 x i8> %2, %3
887  %5  = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
888  %6  = and <32 x i8> %4, %5
889  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
890  %8  = and <32 x i8> %6, %7
891  %9  = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
892  %10 = and <32 x i8> %8, %9
893  %11 = extractelement <32 x i8> %10, i32 0
894  ret i8 %11
895}
896
897; Should not "MOVMSK(PCMPEQ(..)) -> PTESTZ(..)" when cmp result has muti-uses.
898define i32 @test_v32i8_muti_uses(<32 x i8> %x, <32 x i8>%y, i32 %z) {
899; SSE-LABEL: test_v32i8_muti_uses:
900; SSE:       # %bb.0:
901; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
902; SSE-NEXT:    pmovmskb %xmm0, %eax
903; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
904; SSE-NEXT:    pmovmskb %xmm1, %ecx
905; SSE-NEXT:    shll $16, %ecx
906; SSE-NEXT:    orl %eax, %ecx
907; SSE-NEXT:    cmpl $-1, %ecx
908; SSE-NEXT:    movl $16, %eax
909; SSE-NEXT:    cmovnel %ecx, %eax
910; SSE-NEXT:    retq
911;
912; AVX1-LABEL: test_v32i8_muti_uses:
913; AVX1:       # %bb.0:
914; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm2
915; AVX1-NEXT:    vpmovmskb %xmm2, %eax
916; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
917; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
918; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
919; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
920; AVX1-NEXT:    shll $16, %ecx
921; AVX1-NEXT:    orl %eax, %ecx
922; AVX1-NEXT:    cmpl $-1, %ecx
923; AVX1-NEXT:    movl $16, %eax
924; AVX1-NEXT:    cmovnel %ecx, %eax
925; AVX1-NEXT:    vzeroupper
926; AVX1-NEXT:    retq
927;
928; AVX2-LABEL: test_v32i8_muti_uses:
929; AVX2:       # %bb.0:
930; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
931; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
932; AVX2-NEXT:    cmpl $-1, %ecx
933; AVX2-NEXT:    movl $16, %eax
934; AVX2-NEXT:    cmovnel %ecx, %eax
935; AVX2-NEXT:    vzeroupper
936; AVX2-NEXT:    retq
937;
938; AVX512-LABEL: test_v32i8_muti_uses:
939; AVX512:       # %bb.0:
940; AVX512-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
941; AVX512-NEXT:    kortestd %k0, %k0
942; AVX512-NEXT:    kmovd %k0, %ecx
943; AVX512-NEXT:    movl $16, %eax
944; AVX512-NEXT:    cmovael %ecx, %eax
945; AVX512-NEXT:    vzeroupper
946; AVX512-NEXT:    retq
947  %a = icmp eq <32 x i8> %x, %y
948  %b = bitcast <32 x i1> %a to i32
949  %c = icmp eq i32 %b, -1
950  %res = select i1 %c, i32 16, i32 %b
951  ret i32 %res
952}
953
954define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
955; SSE-LABEL: bool_reduction_v2f64:
956; SSE:       # %bb.0:
957; SSE-NEXT:    cmpltpd %xmm0, %xmm1
958; SSE-NEXT:    movmskpd %xmm1, %eax
959; SSE-NEXT:    cmpl $3, %eax
960; SSE-NEXT:    sete %al
961; SSE-NEXT:    retq
962;
963; AVX1OR2-LABEL: bool_reduction_v2f64:
964; AVX1OR2:       # %bb.0:
965; AVX1OR2-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
966; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
967; AVX1OR2-NEXT:    vtestpd %xmm1, %xmm0
968; AVX1OR2-NEXT:    setb %al
969; AVX1OR2-NEXT:    retq
970;
971; AVX512-LABEL: bool_reduction_v2f64:
972; AVX512:       # %bb.0:
973; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
974; AVX512-NEXT:    kmovd %k0, %eax
975; AVX512-NEXT:    cmpb $3, %al
976; AVX512-NEXT:    sete %al
977; AVX512-NEXT:    retq
978  %a = fcmp ogt <2 x double> %x, %y
979  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
980  %c = and <2 x i1> %a, %b
981  %d = extractelement <2 x i1> %c, i32 0
982  ret i1 %d
983}
984
985define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
986; SSE-LABEL: bool_reduction_v4f32:
987; SSE:       # %bb.0:
988; SSE-NEXT:    cmpeqps %xmm1, %xmm0
989; SSE-NEXT:    movmskps %xmm0, %eax
990; SSE-NEXT:    cmpl $15, %eax
991; SSE-NEXT:    sete %al
992; SSE-NEXT:    retq
993;
994; AVX1OR2-LABEL: bool_reduction_v4f32:
995; AVX1OR2:       # %bb.0:
996; AVX1OR2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
997; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
998; AVX1OR2-NEXT:    vtestps %xmm1, %xmm0
999; AVX1OR2-NEXT:    setb %al
1000; AVX1OR2-NEXT:    retq
1001;
1002; AVX512-LABEL: bool_reduction_v4f32:
1003; AVX512:       # %bb.0:
1004; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
1005; AVX512-NEXT:    kmovd %k0, %eax
1006; AVX512-NEXT:    cmpb $15, %al
1007; AVX512-NEXT:    sete %al
1008; AVX512-NEXT:    retq
1009  %a = fcmp oeq <4 x float> %x, %y
1010  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1011  %b = and <4 x i1> %s1, %a
1012  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1013  %c = and <4 x i1> %s2, %b
1014  %d = extractelement <4 x i1> %c, i32 0
1015  ret i1 %d
1016}
1017
1018define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
1019; SSE-LABEL: bool_reduction_v4f64:
1020; SSE:       # %bb.0:
1021; SSE-NEXT:    cmplepd %xmm1, %xmm3
1022; SSE-NEXT:    cmplepd %xmm0, %xmm2
1023; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1024; SSE-NEXT:    movmskps %xmm2, %eax
1025; SSE-NEXT:    cmpl $15, %eax
1026; SSE-NEXT:    sete %al
1027; SSE-NEXT:    retq
1028;
1029; AVX1-LABEL: bool_reduction_v4f64:
1030; AVX1:       # %bb.0:
1031; AVX1-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
1032; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1033; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1034; AVX1-NEXT:    vtestpd %ymm1, %ymm0
1035; AVX1-NEXT:    setb %al
1036; AVX1-NEXT:    vzeroupper
1037; AVX1-NEXT:    retq
1038;
1039; AVX2-LABEL: bool_reduction_v4f64:
1040; AVX2:       # %bb.0:
1041; AVX2-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
1042; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1043; AVX2-NEXT:    vtestpd %ymm1, %ymm0
1044; AVX2-NEXT:    setb %al
1045; AVX2-NEXT:    vzeroupper
1046; AVX2-NEXT:    retq
1047;
1048; AVX512-LABEL: bool_reduction_v4f64:
1049; AVX512:       # %bb.0:
1050; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %k0
1051; AVX512-NEXT:    kmovd %k0, %eax
1052; AVX512-NEXT:    cmpb $15, %al
1053; AVX512-NEXT:    sete %al
1054; AVX512-NEXT:    vzeroupper
1055; AVX512-NEXT:    retq
1056  %a = fcmp oge <4 x double> %x, %y
1057  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1058  %b = and <4 x i1> %s1, %a
1059  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1060  %c = and <4 x i1> %s2, %b
1061  %d = extractelement <4 x i1> %c, i32 0
1062  ret i1 %d
1063}
1064
1065define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
1066; SSE-LABEL: bool_reduction_v8f32:
1067; SSE:       # %bb.0:
1068; SSE-NEXT:    cmpneqps %xmm3, %xmm1
1069; SSE-NEXT:    cmpneqps %xmm2, %xmm0
1070; SSE-NEXT:    packssdw %xmm1, %xmm0
1071; SSE-NEXT:    packsswb %xmm0, %xmm0
1072; SSE-NEXT:    pmovmskb %xmm0, %eax
1073; SSE-NEXT:    cmpb $-1, %al
1074; SSE-NEXT:    sete %al
1075; SSE-NEXT:    retq
1076;
1077; AVX1-LABEL: bool_reduction_v8f32:
1078; AVX1:       # %bb.0:
1079; AVX1-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
1080; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1081; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1082; AVX1-NEXT:    vtestps %ymm1, %ymm0
1083; AVX1-NEXT:    setb %al
1084; AVX1-NEXT:    vzeroupper
1085; AVX1-NEXT:    retq
1086;
1087; AVX2-LABEL: bool_reduction_v8f32:
1088; AVX2:       # %bb.0:
1089; AVX2-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
1090; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1091; AVX2-NEXT:    vtestps %ymm1, %ymm0
1092; AVX2-NEXT:    setb %al
1093; AVX2-NEXT:    vzeroupper
1094; AVX2-NEXT:    retq
1095;
1096; AVX512-LABEL: bool_reduction_v8f32:
1097; AVX512:       # %bb.0:
1098; AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k0
1099; AVX512-NEXT:    kmovd %k0, %eax
1100; AVX512-NEXT:    cmpb $-1, %al
1101; AVX512-NEXT:    sete %al
1102; AVX512-NEXT:    vzeroupper
1103; AVX512-NEXT:    retq
1104  %a = fcmp une <8 x float> %x, %y
1105  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1106  %b = and <8 x i1> %s1, %a
1107  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1108  %c = and <8 x i1> %s2, %b
1109  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1110  %d = and <8 x i1> %s3, %c
1111  %e = extractelement <8 x i1> %d, i32 0
1112  ret i1 %e
1113}
1114
1115define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
1116; SSE2-LABEL: bool_reduction_v2i64:
1117; SSE2:       # %bb.0:
1118; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1119; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1120; SSE2-NEXT:    pand %xmm0, %xmm1
1121; SSE2-NEXT:    movmskpd %xmm1, %eax
1122; SSE2-NEXT:    testl %eax, %eax
1123; SSE2-NEXT:    sete %al
1124; SSE2-NEXT:    retq
1125;
1126; SSE42-LABEL: bool_reduction_v2i64:
1127; SSE42:       # %bb.0:
1128; SSE42-NEXT:    pcmpeqq %xmm1, %xmm0
1129; SSE42-NEXT:    movmskpd %xmm0, %eax
1130; SSE42-NEXT:    testl %eax, %eax
1131; SSE42-NEXT:    sete %al
1132; SSE42-NEXT:    retq
1133;
1134; AVX1OR2-LABEL: bool_reduction_v2i64:
1135; AVX1OR2:       # %bb.0:
1136; AVX1OR2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1137; AVX1OR2-NEXT:    vtestpd %xmm0, %xmm0
1138; AVX1OR2-NEXT:    sete %al
1139; AVX1OR2-NEXT:    retq
1140;
1141; AVX512-LABEL: bool_reduction_v2i64:
1142; AVX512:       # %bb.0:
1143; AVX512-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0
1144; AVX512-NEXT:    kmovd %k0, %eax
1145; AVX512-NEXT:    cmpb $3, %al
1146; AVX512-NEXT:    sete %al
1147; AVX512-NEXT:    retq
1148  %a = icmp ne <2 x i64> %x, %y
1149  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1150  %c = and <2 x i1> %a, %b
1151  %d = extractelement <2 x i1> %c, i32 0
1152  ret i1 %d
1153}
1154
1155define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1156; SSE2-LABEL: bool_reduction_v4i32:
1157; SSE2:       # %bb.0:
1158; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1159; SSE2-NEXT:    pxor %xmm2, %xmm1
1160; SSE2-NEXT:    pxor %xmm2, %xmm0
1161; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1162; SSE2-NEXT:    movmskps %xmm0, %eax
1163; SSE2-NEXT:    cmpl $15, %eax
1164; SSE2-NEXT:    sete %al
1165; SSE2-NEXT:    retq
1166;
1167; SSE42-LABEL: bool_reduction_v4i32:
1168; SSE42:       # %bb.0:
1169; SSE42-NEXT:    pminud %xmm0, %xmm1
1170; SSE42-NEXT:    pcmpeqd %xmm0, %xmm1
1171; SSE42-NEXT:    movmskps %xmm1, %eax
1172; SSE42-NEXT:    testl %eax, %eax
1173; SSE42-NEXT:    sete %al
1174; SSE42-NEXT:    retq
1175;
1176; AVX1OR2-LABEL: bool_reduction_v4i32:
1177; AVX1OR2:       # %bb.0:
1178; AVX1OR2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
1179; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1180; AVX1OR2-NEXT:    vtestps %xmm0, %xmm0
1181; AVX1OR2-NEXT:    sete %al
1182; AVX1OR2-NEXT:    retq
1183;
1184; AVX512-LABEL: bool_reduction_v4i32:
1185; AVX512:       # %bb.0:
1186; AVX512-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0
1187; AVX512-NEXT:    kmovd %k0, %eax
1188; AVX512-NEXT:    cmpb $15, %al
1189; AVX512-NEXT:    sete %al
1190; AVX512-NEXT:    retq
1191  %a = icmp ugt <4 x i32> %x, %y
1192  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1193  %b = and <4 x i1> %s1, %a
1194  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1195  %c = and <4 x i1> %s2, %b
1196  %d = extractelement <4 x i1> %c, i32 0
1197  ret i1 %d
1198}
1199
1200define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1201; SSE-LABEL: bool_reduction_v8i16:
1202; SSE:       # %bb.0:
1203; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
1204; SSE-NEXT:    packsswb %xmm1, %xmm1
1205; SSE-NEXT:    pmovmskb %xmm1, %eax
1206; SSE-NEXT:    cmpb $-1, %al
1207; SSE-NEXT:    sete %al
1208; SSE-NEXT:    retq
1209;
1210; AVX1OR2-LABEL: bool_reduction_v8i16:
1211; AVX1OR2:       # %bb.0:
1212; AVX1OR2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1213; AVX1OR2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1214; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
1215; AVX1OR2-NEXT:    cmpb $-1, %al
1216; AVX1OR2-NEXT:    sete %al
1217; AVX1OR2-NEXT:    retq
1218;
1219; AVX512-LABEL: bool_reduction_v8i16:
1220; AVX512:       # %bb.0:
1221; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0
1222; AVX512-NEXT:    kmovd %k0, %eax
1223; AVX512-NEXT:    cmpb $-1, %al
1224; AVX512-NEXT:    sete %al
1225; AVX512-NEXT:    retq
1226  %a = icmp slt <8 x i16> %x, %y
1227  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1228  %b = and <8 x i1> %s1, %a
1229  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1230  %c = and <8 x i1> %s2, %b
1231  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1232  %d = and <8 x i1> %s3, %c
1233  %e = extractelement <8 x i1> %d, i32 0
1234  ret i1 %e
1235}
1236
1237define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1238; SSE-LABEL: bool_reduction_v16i8:
1239; SSE:       # %bb.0:
1240; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
1241; SSE-NEXT:    pmovmskb %xmm0, %eax
1242; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
1243; SSE-NEXT:    sete %al
1244; SSE-NEXT:    retq
1245;
1246; AVX1OR2-LABEL: bool_reduction_v16i8:
1247; AVX1OR2:       # %bb.0:
1248; AVX1OR2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
1249; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
1250; AVX1OR2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
1251; AVX1OR2-NEXT:    sete %al
1252; AVX1OR2-NEXT:    retq
1253;
1254; AVX512-LABEL: bool_reduction_v16i8:
1255; AVX512:       # %bb.0:
1256; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
1257; AVX512-NEXT:    kortestw %k0, %k0
1258; AVX512-NEXT:    setb %al
1259; AVX512-NEXT:    retq
1260  %a = icmp sgt <16 x i8> %x, %y
1261  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1262  %b = and <16 x i1> %s1, %a
1263  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1264  %c = and <16 x i1> %s2, %b
1265  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1266  %d = and <16 x i1> %s3, %c
1267  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1268  %e = and <16 x i1> %s4, %d
1269  %f = extractelement <16 x i1> %e, i32 0
1270  ret i1 %f
1271}
1272
1273define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1274; SSE2-LABEL: bool_reduction_v4i64:
1275; SSE2:       # %bb.0:
1276; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1277; SSE2-NEXT:    pxor %xmm4, %xmm1
1278; SSE2-NEXT:    pxor %xmm4, %xmm3
1279; SSE2-NEXT:    movdqa %xmm3, %xmm5
1280; SSE2-NEXT:    pcmpgtd %xmm1, %xmm5
1281; SSE2-NEXT:    pxor %xmm4, %xmm0
1282; SSE2-NEXT:    pxor %xmm4, %xmm2
1283; SSE2-NEXT:    movdqa %xmm2, %xmm4
1284; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1285; SSE2-NEXT:    movdqa %xmm4, %xmm6
1286; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
1287; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1288; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1289; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
1290; SSE2-NEXT:    andps %xmm6, %xmm2
1291; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
1292; SSE2-NEXT:    orps %xmm2, %xmm4
1293; SSE2-NEXT:    movmskps %xmm4, %eax
1294; SSE2-NEXT:    cmpl $15, %eax
1295; SSE2-NEXT:    sete %al
1296; SSE2-NEXT:    retq
1297;
1298; SSE42-LABEL: bool_reduction_v4i64:
1299; SSE42:       # %bb.0:
1300; SSE42-NEXT:    pcmpgtq %xmm1, %xmm3
1301; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
1302; SSE42-NEXT:    packssdw %xmm3, %xmm2
1303; SSE42-NEXT:    movmskps %xmm2, %eax
1304; SSE42-NEXT:    cmpl $15, %eax
1305; SSE42-NEXT:    sete %al
1306; SSE42-NEXT:    retq
1307;
1308; AVX1-LABEL: bool_reduction_v4i64:
1309; AVX1:       # %bb.0:
1310; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1311; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1312; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1313; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
1314; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1315; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1316; AVX1-NEXT:    vtestpd %xmm1, %xmm0
1317; AVX1-NEXT:    setb %al
1318; AVX1-NEXT:    vzeroupper
1319; AVX1-NEXT:    retq
1320;
1321; AVX2-LABEL: bool_reduction_v4i64:
1322; AVX2:       # %bb.0:
1323; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
1324; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1325; AVX2-NEXT:    vtestpd %ymm1, %ymm0
1326; AVX2-NEXT:    setb %al
1327; AVX2-NEXT:    vzeroupper
1328; AVX2-NEXT:    retq
1329;
1330; AVX512-LABEL: bool_reduction_v4i64:
1331; AVX512:       # %bb.0:
1332; AVX512-NEXT:    vpcmpgtq %ymm0, %ymm1, %k0
1333; AVX512-NEXT:    kmovd %k0, %eax
1334; AVX512-NEXT:    cmpb $15, %al
1335; AVX512-NEXT:    sete %al
1336; AVX512-NEXT:    vzeroupper
1337; AVX512-NEXT:    retq
1338  %a = icmp slt <4 x i64> %x, %y
1339  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1340  %b = and <4 x i1> %s1, %a
1341  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1342  %c = and <4 x i1> %s2, %b
1343  %d = extractelement <4 x i1> %c, i32 0
1344  ret i1 %d
1345}
1346
1347define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1348; SSE2-LABEL: bool_reduction_v8i32:
1349; SSE2:       # %bb.0:
1350; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1351; SSE2-NEXT:    pxor %xmm4, %xmm3
1352; SSE2-NEXT:    pxor %xmm4, %xmm1
1353; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
1354; SSE2-NEXT:    pxor %xmm4, %xmm2
1355; SSE2-NEXT:    pxor %xmm4, %xmm0
1356; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1357; SSE2-NEXT:    packssdw %xmm1, %xmm0
1358; SSE2-NEXT:    packsswb %xmm0, %xmm0
1359; SSE2-NEXT:    pmovmskb %xmm0, %eax
1360; SSE2-NEXT:    notl %eax
1361; SSE2-NEXT:    cmpb $-1, %al
1362; SSE2-NEXT:    sete %al
1363; SSE2-NEXT:    retq
1364;
1365; SSE42-LABEL: bool_reduction_v8i32:
1366; SSE42:       # %bb.0:
1367; SSE42-NEXT:    pminud %xmm1, %xmm3
1368; SSE42-NEXT:    pcmpeqd %xmm1, %xmm3
1369; SSE42-NEXT:    pminud %xmm0, %xmm2
1370; SSE42-NEXT:    pcmpeqd %xmm0, %xmm2
1371; SSE42-NEXT:    packssdw %xmm3, %xmm2
1372; SSE42-NEXT:    packsswb %xmm2, %xmm2
1373; SSE42-NEXT:    pmovmskb %xmm2, %eax
1374; SSE42-NEXT:    cmpb $-1, %al
1375; SSE42-NEXT:    sete %al
1376; SSE42-NEXT:    retq
1377;
1378; AVX1-LABEL: bool_reduction_v8i32:
1379; AVX1:       # %bb.0:
1380; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1381; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1382; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1383; AVX1-NEXT:    vpxor %xmm2, %xmm3, %xmm2
1384; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm1
1385; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1386; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1387; AVX1-NEXT:    vptest %xmm0, %xmm0
1388; AVX1-NEXT:    sete %al
1389; AVX1-NEXT:    vzeroupper
1390; AVX1-NEXT:    retq
1391;
1392; AVX2-LABEL: bool_reduction_v8i32:
1393; AVX2:       # %bb.0:
1394; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
1395; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1396; AVX2-NEXT:    vptest %ymm0, %ymm0
1397; AVX2-NEXT:    sete %al
1398; AVX2-NEXT:    vzeroupper
1399; AVX2-NEXT:    retq
1400;
1401; AVX512-LABEL: bool_reduction_v8i32:
1402; AVX512:       # %bb.0:
1403; AVX512-NEXT:    vpcmpleud %ymm1, %ymm0, %k0
1404; AVX512-NEXT:    kmovd %k0, %eax
1405; AVX512-NEXT:    cmpb $-1, %al
1406; AVX512-NEXT:    sete %al
1407; AVX512-NEXT:    vzeroupper
1408; AVX512-NEXT:    retq
1409  %a = icmp ule <8 x i32> %x, %y
1410  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1411  %b = and <8 x i1> %s1, %a
1412  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1413  %c = and <8 x i1> %s2, %b
1414  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1415  %d = and <8 x i1> %s3, %c
1416  %e = extractelement <8 x i1> %d, i32 0
1417  ret i1 %e
1418}
1419
1420define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1421; SSE2-LABEL: bool_reduction_v16i16:
1422; SSE2:       # %bb.0:
1423; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
1424; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
1425; SSE2-NEXT:    pand %xmm1, %xmm0
1426; SSE2-NEXT:    pmovmskb %xmm0, %eax
1427; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1428; SSE2-NEXT:    sete %al
1429; SSE2-NEXT:    retq
1430;
1431; SSE42-LABEL: bool_reduction_v16i16:
1432; SSE42:       # %bb.0:
1433; SSE42-NEXT:    pxor %xmm3, %xmm1
1434; SSE42-NEXT:    pxor %xmm2, %xmm0
1435; SSE42-NEXT:    por %xmm1, %xmm0
1436; SSE42-NEXT:    ptest %xmm0, %xmm0
1437; SSE42-NEXT:    sete %al
1438; SSE42-NEXT:    retq
1439;
1440; AVX1-LABEL: bool_reduction_v16i16:
1441; AVX1:       # %bb.0:
1442; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
1443; AVX1-NEXT:    vptest %ymm0, %ymm0
1444; AVX1-NEXT:    sete %al
1445; AVX1-NEXT:    vzeroupper
1446; AVX1-NEXT:    retq
1447;
1448; AVX2-LABEL: bool_reduction_v16i16:
1449; AVX2:       # %bb.0:
1450; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1451; AVX2-NEXT:    vptest %ymm0, %ymm0
1452; AVX2-NEXT:    sete %al
1453; AVX2-NEXT:    vzeroupper
1454; AVX2-NEXT:    retq
1455;
1456; AVX512-LABEL: bool_reduction_v16i16:
1457; AVX512:       # %bb.0:
1458; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1459; AVX512-NEXT:    vptest %ymm0, %ymm0
1460; AVX512-NEXT:    sete %al
1461; AVX512-NEXT:    vzeroupper
1462; AVX512-NEXT:    retq
1463  %a = icmp eq <16 x i16> %x, %y
1464  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1465  %b = and <16 x i1> %s1, %a
1466  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1467  %c = and <16 x i1> %s2, %b
1468  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1469  %d = and <16 x i1> %s3, %c
1470  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1471  %e = and <16 x i1> %s4, %d
1472  %f = extractelement <16 x i1> %e, i32 0
1473  ret i1 %f
1474}
1475
1476define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1477; SSE2-LABEL: bool_reduction_v32i8:
1478; SSE2:       # %bb.0:
1479; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
1480; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
1481; SSE2-NEXT:    pand %xmm1, %xmm0
1482; SSE2-NEXT:    pmovmskb %xmm0, %eax
1483; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1484; SSE2-NEXT:    sete %al
1485; SSE2-NEXT:    retq
1486;
1487; SSE42-LABEL: bool_reduction_v32i8:
1488; SSE42:       # %bb.0:
1489; SSE42-NEXT:    pxor %xmm3, %xmm1
1490; SSE42-NEXT:    pxor %xmm2, %xmm0
1491; SSE42-NEXT:    por %xmm1, %xmm0
1492; SSE42-NEXT:    ptest %xmm0, %xmm0
1493; SSE42-NEXT:    sete %al
1494; SSE42-NEXT:    retq
1495;
1496; AVX1-LABEL: bool_reduction_v32i8:
1497; AVX1:       # %bb.0:
1498; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
1499; AVX1-NEXT:    vptest %ymm0, %ymm0
1500; AVX1-NEXT:    sete %al
1501; AVX1-NEXT:    vzeroupper
1502; AVX1-NEXT:    retq
1503;
1504; AVX2-LABEL: bool_reduction_v32i8:
1505; AVX2:       # %bb.0:
1506; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1507; AVX2-NEXT:    vptest %ymm0, %ymm0
1508; AVX2-NEXT:    sete %al
1509; AVX2-NEXT:    vzeroupper
1510; AVX2-NEXT:    retq
1511;
1512; AVX512-LABEL: bool_reduction_v32i8:
1513; AVX512:       # %bb.0:
1514; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1515; AVX512-NEXT:    vptest %ymm0, %ymm0
1516; AVX512-NEXT:    sete %al
1517; AVX512-NEXT:    vzeroupper
1518; AVX512-NEXT:    retq
1519  %a = icmp eq <32 x i8> %x, %y
1520  %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1521  %b = and <32 x i1> %s1, %a
1522  %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1523  %c = and <32 x i1> %s2, %b
1524  %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1525  %d = and <32 x i1> %s3, %c
1526  %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1527  %e = and <32 x i1> %s4, %d
1528  %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1529  %f = and <32 x i1> %s5, %e
1530  %g = extractelement <32 x i1> %f, i32 0
1531  ret i1 %g
1532}
1533
1534; PR59867
1535define i1 @select_v2i8(ptr %s0, ptr %s1) {
1536; SSE2-LABEL: select_v2i8:
1537; SSE2:       # %bb.0:
1538; SSE2-NEXT:    movzwl (%rdi), %eax
1539; SSE2-NEXT:    movd %eax, %xmm0
1540; SSE2-NEXT:    movzwl (%rsi), %eax
1541; SSE2-NEXT:    movd %eax, %xmm1
1542; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1543; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1544; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
1545; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1546; SSE2-NEXT:    movmskpd %xmm0, %eax
1547; SSE2-NEXT:    cmpl $3, %eax
1548; SSE2-NEXT:    sete %al
1549; SSE2-NEXT:    retq
1550;
1551; SSE42-LABEL: select_v2i8:
1552; SSE42:       # %bb.0:
1553; SSE42-NEXT:    movzwl (%rdi), %eax
1554; SSE42-NEXT:    movd %eax, %xmm0
1555; SSE42-NEXT:    movzwl (%rsi), %eax
1556; SSE42-NEXT:    movd %eax, %xmm1
1557; SSE42-NEXT:    pcmpeqb %xmm0, %xmm1
1558; SSE42-NEXT:    pmovsxbq %xmm1, %xmm0
1559; SSE42-NEXT:    movmskpd %xmm0, %eax
1560; SSE42-NEXT:    cmpl $3, %eax
1561; SSE42-NEXT:    sete %al
1562; SSE42-NEXT:    retq
1563;
1564; AVX1OR2-LABEL: select_v2i8:
1565; AVX1OR2:       # %bb.0:
1566; AVX1OR2-NEXT:    movzwl (%rdi), %eax
1567; AVX1OR2-NEXT:    vmovd %eax, %xmm0
1568; AVX1OR2-NEXT:    movzwl (%rsi), %eax
1569; AVX1OR2-NEXT:    vmovd %eax, %xmm1
1570; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1571; AVX1OR2-NEXT:    vpmovsxbq %xmm0, %xmm0
1572; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1573; AVX1OR2-NEXT:    vtestpd %xmm1, %xmm0
1574; AVX1OR2-NEXT:    setb %al
1575; AVX1OR2-NEXT:    retq
1576;
1577; AVX512-LABEL: select_v2i8:
1578; AVX512:       # %bb.0:
1579; AVX512-NEXT:    movzwl (%rdi), %eax
1580; AVX512-NEXT:    vmovd %eax, %xmm0
1581; AVX512-NEXT:    movzwl (%rsi), %eax
1582; AVX512-NEXT:    vmovd %eax, %xmm1
1583; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
1584; AVX512-NEXT:    knotw %k0, %k0
1585; AVX512-NEXT:    kmovd %k0, %eax
1586; AVX512-NEXT:    testb $3, %al
1587; AVX512-NEXT:    sete %al
1588; AVX512-NEXT:    retq
1589  %v0 = load <2 x i8>, ptr %s0, align 1
1590  %v1 = load <2 x i8>, ptr %s1, align 1
1591  %cmp = icmp eq <2 x i8> %v0, %v1
1592  %cmp0 = extractelement <2 x i1> %cmp, i32 0
1593  %cmp1 = extractelement <2 x i1> %cmp, i32 1
1594  %res = select i1 %cmp0, i1 %cmp1, i1 false
1595  ret i1 %res
1596}
1597
1598define i1 @PR116977(<32 x i8> %a, <32 x i8> %b, <32 x i8> %v) {
1599; SSE-LABEL: PR116977:
1600; SSE:       # %bb.0:
1601; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
1602; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
1603; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
1604; SSE-NEXT:    pand %xmm0, %xmm2
1605; SSE-NEXT:    pcmpeqb %xmm5, %xmm3
1606; SSE-NEXT:    pand %xmm1, %xmm3
1607; SSE-NEXT:    pand %xmm2, %xmm3
1608; SSE-NEXT:    pmovmskb %xmm3, %eax
1609; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
1610; SSE-NEXT:    sete %al
1611; SSE-NEXT:    retq
1612;
1613; AVX1-LABEL: PR116977:
1614; AVX1:       # %bb.0:
1615; AVX1-NEXT:    vpcmpeqb %xmm0, %xmm2, %xmm3
1616; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1617; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
1618; AVX1-NEXT:    vpcmpeqb %xmm0, %xmm4, %xmm0
1619; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm2
1620; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
1621; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
1622; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm4, %xmm1
1623; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1624; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
1625; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1626; AVX1-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1627; AVX1-NEXT:    sete %al
1628; AVX1-NEXT:    vzeroupper
1629; AVX1-NEXT:    retq
1630;
1631; AVX2-LABEL: PR116977:
1632; AVX2:       # %bb.0:
1633; AVX2-NEXT:    vpxor %ymm0, %ymm2, %ymm0
1634; AVX2-NEXT:    vpxor %ymm1, %ymm2, %ymm1
1635; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1636; AVX2-NEXT:    vptest %ymm0, %ymm0
1637; AVX2-NEXT:    sete %al
1638; AVX2-NEXT:    vzeroupper
1639; AVX2-NEXT:    retq
1640;
1641; AVX512-LABEL: PR116977:
1642; AVX512:       # %bb.0:
1643; AVX512-NEXT:    vpcmpneqb %ymm0, %ymm2, %k0
1644; AVX512-NEXT:    vpcmpneqb %ymm1, %ymm2, %k1
1645; AVX512-NEXT:    kortestd %k1, %k0
1646; AVX512-NEXT:    sete %al
1647; AVX512-NEXT:    vzeroupper
1648; AVX512-NEXT:    retq
1649  %ca = icmp ne <32 x i8> %v, %a
1650  %cb = icmp ne <32 x i8> %v, %b
1651  %or = or <32 x i1> %ca, %cb
1652  %scl = bitcast <32 x i1> %or to i32
1653  %cmp = icmp eq i32 %scl, 0
1654  ret i1 %cmp
1655}
1656