xref: /llvm-project/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll (revision 69ffa7be3bda5547d7a41233f86b88539616e386)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
10
11;
12; Truncate
13;
14
15define i1 @trunc_v2i64_v2i1(<2 x i64>) nounwind {
16; SSE-LABEL: trunc_v2i64_v2i1:
17; SSE:       # %bb.0:
18; SSE-NEXT:    psllq $63, %xmm0
19; SSE-NEXT:    movmskpd %xmm0, %eax
20; SSE-NEXT:    testb %al, %al
21; SSE-NEXT:    setnp %al
22; SSE-NEXT:    ret{{[l|q]}}
23;
24; AVX-LABEL: trunc_v2i64_v2i1:
25; AVX:       # %bb.0:
26; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
27; AVX-NEXT:    vmovmskpd %xmm0, %eax
28; AVX-NEXT:    testb %al, %al
29; AVX-NEXT:    setnp %al
30; AVX-NEXT:    retq
31;
32; AVX512F-LABEL: trunc_v2i64_v2i1:
33; AVX512F:       # %bb.0:
34; AVX512F-NEXT:    vpsllq $63, %xmm0, %xmm0
35; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
36; AVX512F-NEXT:    kmovw %k0, %eax
37; AVX512F-NEXT:    testb $3, %al
38; AVX512F-NEXT:    setnp %al
39; AVX512F-NEXT:    vzeroupper
40; AVX512F-NEXT:    retq
41;
42; AVX512BW-LABEL: trunc_v2i64_v2i1:
43; AVX512BW:       # %bb.0:
44; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
45; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
46; AVX512BW-NEXT:    kmovd %k0, %eax
47; AVX512BW-NEXT:    testb $3, %al
48; AVX512BW-NEXT:    setnp %al
49; AVX512BW-NEXT:    vzeroupper
50; AVX512BW-NEXT:    retq
51;
52; AVX512VL-LABEL: trunc_v2i64_v2i1:
53; AVX512VL:       # %bb.0:
54; AVX512VL-NEXT:    vpsllq $63, %xmm0, %xmm0
55; AVX512VL-NEXT:    vptestmq %xmm0, %xmm0, %k0
56; AVX512VL-NEXT:    kmovd %k0, %eax
57; AVX512VL-NEXT:    testb %al, %al
58; AVX512VL-NEXT:    setnp %al
59; AVX512VL-NEXT:    retq
60  %a = trunc <2 x i64> %0 to <2 x i1>
61  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
62  ret i1 %b
63}
64
65define i1 @trunc_v4i32_v4i1(<4 x i32>) nounwind {
66; SSE-LABEL: trunc_v4i32_v4i1:
67; SSE:       # %bb.0:
68; SSE-NEXT:    pslld $31, %xmm0
69; SSE-NEXT:    movmskps %xmm0, %eax
70; SSE-NEXT:    testb %al, %al
71; SSE-NEXT:    setnp %al
72; SSE-NEXT:    ret{{[l|q]}}
73;
74; AVX-LABEL: trunc_v4i32_v4i1:
75; AVX:       # %bb.0:
76; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
77; AVX-NEXT:    vmovmskps %xmm0, %eax
78; AVX-NEXT:    testb %al, %al
79; AVX-NEXT:    setnp %al
80; AVX-NEXT:    retq
81;
82; AVX512F-LABEL: trunc_v4i32_v4i1:
83; AVX512F:       # %bb.0:
84; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
85; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
86; AVX512F-NEXT:    kmovw %k0, %eax
87; AVX512F-NEXT:    testb $15, %al
88; AVX512F-NEXT:    setnp %al
89; AVX512F-NEXT:    vzeroupper
90; AVX512F-NEXT:    retq
91;
92; AVX512BW-LABEL: trunc_v4i32_v4i1:
93; AVX512BW:       # %bb.0:
94; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
95; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
96; AVX512BW-NEXT:    kmovd %k0, %eax
97; AVX512BW-NEXT:    testb $15, %al
98; AVX512BW-NEXT:    setnp %al
99; AVX512BW-NEXT:    vzeroupper
100; AVX512BW-NEXT:    retq
101;
102; AVX512VL-LABEL: trunc_v4i32_v4i1:
103; AVX512VL:       # %bb.0:
104; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
105; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k0
106; AVX512VL-NEXT:    kmovd %k0, %eax
107; AVX512VL-NEXT:    testb %al, %al
108; AVX512VL-NEXT:    setnp %al
109; AVX512VL-NEXT:    retq
110  %a = trunc <4 x i32> %0 to <4 x i1>
111  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
112  ret i1 %b
113}
114
115define i1 @trunc_v8i16_v8i1(<8 x i16>) nounwind {
116; SSE-LABEL: trunc_v8i16_v8i1:
117; SSE:       # %bb.0:
118; SSE-NEXT:    psllw $15, %xmm0
119; SSE-NEXT:    packsswb %xmm0, %xmm0
120; SSE-NEXT:    pmovmskb %xmm0, %eax
121; SSE-NEXT:    testb %al, %al
122; SSE-NEXT:    setnp %al
123; SSE-NEXT:    ret{{[l|q]}}
124;
125; AVX-LABEL: trunc_v8i16_v8i1:
126; AVX:       # %bb.0:
127; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
128; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
129; AVX-NEXT:    vpmovmskb %xmm0, %eax
130; AVX-NEXT:    testb %al, %al
131; AVX-NEXT:    setnp %al
132; AVX-NEXT:    retq
133;
134; AVX512F-LABEL: trunc_v8i16_v8i1:
135; AVX512F:       # %bb.0:
136; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
137; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
138; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
139; AVX512F-NEXT:    kmovw %k0, %eax
140; AVX512F-NEXT:    testb %al, %al
141; AVX512F-NEXT:    setnp %al
142; AVX512F-NEXT:    vzeroupper
143; AVX512F-NEXT:    retq
144;
145; AVX512BW-LABEL: trunc_v8i16_v8i1:
146; AVX512BW:       # %bb.0:
147; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
148; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
149; AVX512BW-NEXT:    kmovd %k0, %eax
150; AVX512BW-NEXT:    testb %al, %al
151; AVX512BW-NEXT:    setnp %al
152; AVX512BW-NEXT:    vzeroupper
153; AVX512BW-NEXT:    retq
154;
155; AVX512VL-LABEL: trunc_v8i16_v8i1:
156; AVX512VL:       # %bb.0:
157; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
158; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
159; AVX512VL-NEXT:    kmovd %k0, %eax
160; AVX512VL-NEXT:    testb %al, %al
161; AVX512VL-NEXT:    setnp %al
162; AVX512VL-NEXT:    retq
163  %a = trunc <8 x i16> %0 to <8 x i1>
164  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
165  ret i1 %b
166}
167
168define i1 @trunc_v16i8_v16i1(<16 x i8>) nounwind {
169; SSE-LABEL: trunc_v16i8_v16i1:
170; SSE:       # %bb.0:
171; SSE-NEXT:    psllw $7, %xmm0
172; SSE-NEXT:    pmovmskb %xmm0, %eax
173; SSE-NEXT:    xorb %ah, %al
174; SSE-NEXT:    setnp %al
175; SSE-NEXT:    ret{{[l|q]}}
176;
177; AVX-LABEL: trunc_v16i8_v16i1:
178; AVX:       # %bb.0:
179; AVX-NEXT:    vpsllw $7, %xmm0, %xmm0
180; AVX-NEXT:    vpmovmskb %xmm0, %eax
181; AVX-NEXT:    xorb %ah, %al
182; AVX-NEXT:    setnp %al
183; AVX-NEXT:    retq
184;
185; AVX512-LABEL: trunc_v16i8_v16i1:
186; AVX512:       # %bb.0:
187; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0
188; AVX512-NEXT:    vpmovmskb %xmm0, %eax
189; AVX512-NEXT:    xorb %ah, %al
190; AVX512-NEXT:    setnp %al
191; AVX512-NEXT:    retq
192  %a = trunc <16 x i8> %0 to <16 x i1>
193  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
194  ret i1 %b
195}
196
197define i1 @trunc_v4i64_v4i1(<4 x i64>) nounwind {
198; SSE-LABEL: trunc_v4i64_v4i1:
199; SSE:       # %bb.0:
200; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
201; SSE-NEXT:    pslld $31, %xmm0
202; SSE-NEXT:    movmskps %xmm0, %eax
203; SSE-NEXT:    testb %al, %al
204; SSE-NEXT:    setnp %al
205; SSE-NEXT:    ret{{[l|q]}}
206;
207; AVX1-LABEL: trunc_v4i64_v4i1:
208; AVX1:       # %bb.0:
209; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
210; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
211; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
212; AVX1-NEXT:    vmovmskps %xmm0, %eax
213; AVX1-NEXT:    testb %al, %al
214; AVX1-NEXT:    setnp %al
215; AVX1-NEXT:    vzeroupper
216; AVX1-NEXT:    retq
217;
218; AVX2-LABEL: trunc_v4i64_v4i1:
219; AVX2:       # %bb.0:
220; AVX2-NEXT:    vpsllq $63, %ymm0, %ymm0
221; AVX2-NEXT:    vmovmskpd %ymm0, %eax
222; AVX2-NEXT:    testb %al, %al
223; AVX2-NEXT:    setnp %al
224; AVX2-NEXT:    vzeroupper
225; AVX2-NEXT:    retq
226;
227; AVX512F-LABEL: trunc_v4i64_v4i1:
228; AVX512F:       # %bb.0:
229; AVX512F-NEXT:    vpsllq $63, %ymm0, %ymm0
230; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
231; AVX512F-NEXT:    kmovw %k0, %eax
232; AVX512F-NEXT:    testb $15, %al
233; AVX512F-NEXT:    setnp %al
234; AVX512F-NEXT:    vzeroupper
235; AVX512F-NEXT:    retq
236;
237; AVX512BW-LABEL: trunc_v4i64_v4i1:
238; AVX512BW:       # %bb.0:
239; AVX512BW-NEXT:    vpsllq $63, %ymm0, %ymm0
240; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
241; AVX512BW-NEXT:    kmovd %k0, %eax
242; AVX512BW-NEXT:    testb $15, %al
243; AVX512BW-NEXT:    setnp %al
244; AVX512BW-NEXT:    vzeroupper
245; AVX512BW-NEXT:    retq
246;
247; AVX512VL-LABEL: trunc_v4i64_v4i1:
248; AVX512VL:       # %bb.0:
249; AVX512VL-NEXT:    vpsllq $63, %ymm0, %ymm0
250; AVX512VL-NEXT:    vptestmq %ymm0, %ymm0, %k0
251; AVX512VL-NEXT:    kmovd %k0, %eax
252; AVX512VL-NEXT:    testb %al, %al
253; AVX512VL-NEXT:    setnp %al
254; AVX512VL-NEXT:    vzeroupper
255; AVX512VL-NEXT:    retq
256  %a = trunc <4 x i64> %0 to <4 x i1>
257  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
258  ret i1 %b
259}
260
261define i1 @trunc_v8i32_v8i1(<8 x i32>) nounwind {
262; SSE2-LABEL: trunc_v8i32_v8i1:
263; SSE2:       # %bb.0:
264; SSE2-NEXT:    pslld $16, %xmm1
265; SSE2-NEXT:    psrad $16, %xmm1
266; SSE2-NEXT:    pslld $16, %xmm0
267; SSE2-NEXT:    psrad $16, %xmm0
268; SSE2-NEXT:    packssdw %xmm1, %xmm0
269; SSE2-NEXT:    psllw $15, %xmm0
270; SSE2-NEXT:    packsswb %xmm0, %xmm0
271; SSE2-NEXT:    pmovmskb %xmm0, %eax
272; SSE2-NEXT:    testb %al, %al
273; SSE2-NEXT:    setnp %al
274; SSE2-NEXT:    ret{{[l|q]}}
275;
276; SSE41-LABEL: trunc_v8i32_v8i1:
277; SSE41:       # %bb.0:
278; SSE41-NEXT:    pxor %xmm2, %xmm2
279; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
280; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
281; SSE41-NEXT:    packusdw %xmm1, %xmm0
282; SSE41-NEXT:    psllw $15, %xmm0
283; SSE41-NEXT:    packsswb %xmm0, %xmm0
284; SSE41-NEXT:    pmovmskb %xmm0, %eax
285; SSE41-NEXT:    testb %al, %al
286; SSE41-NEXT:    setnp %al
287; SSE41-NEXT:    retq
288;
289; AVX1-LABEL: trunc_v8i32_v8i1:
290; AVX1:       # %bb.0:
291; AVX1-NEXT:    vpslld $31, %xmm0, %xmm1
292; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
293; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
294; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
295; AVX1-NEXT:    vmovmskps %ymm0, %eax
296; AVX1-NEXT:    testb %al, %al
297; AVX1-NEXT:    setnp %al
298; AVX1-NEXT:    vzeroupper
299; AVX1-NEXT:    retq
300;
301; AVX2-LABEL: trunc_v8i32_v8i1:
302; AVX2:       # %bb.0:
303; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
304; AVX2-NEXT:    vmovmskps %ymm0, %eax
305; AVX2-NEXT:    testb %al, %al
306; AVX2-NEXT:    setnp %al
307; AVX2-NEXT:    vzeroupper
308; AVX2-NEXT:    retq
309;
310; AVX512F-LABEL: trunc_v8i32_v8i1:
311; AVX512F:       # %bb.0:
312; AVX512F-NEXT:    vpslld $31, %ymm0, %ymm0
313; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
314; AVX512F-NEXT:    kmovw %k0, %eax
315; AVX512F-NEXT:    testb %al, %al
316; AVX512F-NEXT:    setnp %al
317; AVX512F-NEXT:    vzeroupper
318; AVX512F-NEXT:    retq
319;
320; AVX512BW-LABEL: trunc_v8i32_v8i1:
321; AVX512BW:       # %bb.0:
322; AVX512BW-NEXT:    vpslld $31, %ymm0, %ymm0
323; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
324; AVX512BW-NEXT:    kmovd %k0, %eax
325; AVX512BW-NEXT:    testb %al, %al
326; AVX512BW-NEXT:    setnp %al
327; AVX512BW-NEXT:    vzeroupper
328; AVX512BW-NEXT:    retq
329;
330; AVX512VL-LABEL: trunc_v8i32_v8i1:
331; AVX512VL:       # %bb.0:
332; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
333; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
334; AVX512VL-NEXT:    kmovd %k0, %eax
335; AVX512VL-NEXT:    testb %al, %al
336; AVX512VL-NEXT:    setnp %al
337; AVX512VL-NEXT:    vzeroupper
338; AVX512VL-NEXT:    retq
339  %a = trunc <8 x i32> %0 to <8 x i1>
340  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
341  ret i1 %b
342}
343
344define i1 @trunc_v16i16_v16i1(<16 x i16>) nounwind {
345; SSE2-LABEL: trunc_v16i16_v16i1:
346; SSE2:       # %bb.0:
347; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
348; SSE2-NEXT:    pand %xmm2, %xmm1
349; SSE2-NEXT:    pand %xmm2, %xmm0
350; SSE2-NEXT:    packuswb %xmm1, %xmm0
351; SSE2-NEXT:    psllw $7, %xmm0
352; SSE2-NEXT:    pmovmskb %xmm0, %eax
353; SSE2-NEXT:    xorb %ah, %al
354; SSE2-NEXT:    setnp %al
355; SSE2-NEXT:    ret{{[l|q]}}
356;
357; SSE41-LABEL: trunc_v16i16_v16i1:
358; SSE41:       # %bb.0:
359; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
360; SSE41-NEXT:    pand %xmm2, %xmm1
361; SSE41-NEXT:    pand %xmm2, %xmm0
362; SSE41-NEXT:    packuswb %xmm1, %xmm0
363; SSE41-NEXT:    psllw $7, %xmm0
364; SSE41-NEXT:    pmovmskb %xmm0, %eax
365; SSE41-NEXT:    xorb %ah, %al
366; SSE41-NEXT:    setnp %al
367; SSE41-NEXT:    retq
368;
369; AVX1-LABEL: trunc_v16i16_v16i1:
370; AVX1:       # %bb.0:
371; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
372; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
373; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
374; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
375; AVX1-NEXT:    vpmovmskb %xmm0, %eax
376; AVX1-NEXT:    xorb %ah, %al
377; AVX1-NEXT:    setnp %al
378; AVX1-NEXT:    vzeroupper
379; AVX1-NEXT:    retq
380;
381; AVX2-LABEL: trunc_v16i16_v16i1:
382; AVX2:       # %bb.0:
383; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
384; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
385; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
386; AVX2-NEXT:    vpsllw $7, %xmm0, %xmm0
387; AVX2-NEXT:    vpmovmskb %xmm0, %eax
388; AVX2-NEXT:    xorb %ah, %al
389; AVX2-NEXT:    setnp %al
390; AVX2-NEXT:    vzeroupper
391; AVX2-NEXT:    retq
392;
393; AVX512F-LABEL: trunc_v16i16_v16i1:
394; AVX512F:       # %bb.0:
395; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
396; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
397; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
398; AVX512F-NEXT:    kmovw %k0, %eax
399; AVX512F-NEXT:    movl %eax, %ecx
400; AVX512F-NEXT:    shrl $8, %ecx
401; AVX512F-NEXT:    xorb %al, %cl
402; AVX512F-NEXT:    setnp %al
403; AVX512F-NEXT:    vzeroupper
404; AVX512F-NEXT:    retq
405;
406; AVX512BW-LABEL: trunc_v16i16_v16i1:
407; AVX512BW:       # %bb.0:
408; AVX512BW-NEXT:    vpsllw $15, %ymm0, %ymm0
409; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
410; AVX512BW-NEXT:    kmovd %k0, %eax
411; AVX512BW-NEXT:    movl %eax, %ecx
412; AVX512BW-NEXT:    shrl $8, %ecx
413; AVX512BW-NEXT:    xorb %al, %cl
414; AVX512BW-NEXT:    setnp %al
415; AVX512BW-NEXT:    vzeroupper
416; AVX512BW-NEXT:    retq
417;
418; AVX512VL-LABEL: trunc_v16i16_v16i1:
419; AVX512VL:       # %bb.0:
420; AVX512VL-NEXT:    vpsllw $15, %ymm0, %ymm0
421; AVX512VL-NEXT:    vpmovw2m %ymm0, %k0
422; AVX512VL-NEXT:    kmovd %k0, %eax
423; AVX512VL-NEXT:    movl %eax, %ecx
424; AVX512VL-NEXT:    shrl $8, %ecx
425; AVX512VL-NEXT:    xorb %al, %cl
426; AVX512VL-NEXT:    setnp %al
427; AVX512VL-NEXT:    vzeroupper
428; AVX512VL-NEXT:    retq
429  %a = trunc <16 x i16> %0 to <16 x i1>
430  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
431  ret i1 %b
432}
433
434define i1 @trunc_v32i8_v32i1(<32 x i8>) nounwind {
435; SSE-LABEL: trunc_v32i8_v32i1:
436; SSE:       # %bb.0:
437; SSE-NEXT:    pxor %xmm1, %xmm0
438; SSE-NEXT:    psllw $7, %xmm0
439; SSE-NEXT:    pmovmskb %xmm0, %eax
440; SSE-NEXT:    xorb %ah, %al
441; SSE-NEXT:    setnp %al
442; SSE-NEXT:    ret{{[l|q]}}
443;
444; AVX1-LABEL: trunc_v32i8_v32i1:
445; AVX1:       # %bb.0:
446; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
447; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
448; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
449; AVX1-NEXT:    vpmovmskb %xmm0, %eax
450; AVX1-NEXT:    xorb %ah, %al
451; AVX1-NEXT:    setnp %al
452; AVX1-NEXT:    vzeroupper
453; AVX1-NEXT:    retq
454;
455; AVX2-LABEL: trunc_v32i8_v32i1:
456; AVX2:       # %bb.0:
457; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
458; AVX2-NEXT:    vpmovmskb %ymm0, %eax
459; AVX2-NEXT:    movl %eax, %ecx
460; AVX2-NEXT:    shrl $16, %ecx
461; AVX2-NEXT:    xorl %eax, %ecx
462; AVX2-NEXT:    xorb %ch, %cl
463; AVX2-NEXT:    setnp %al
464; AVX2-NEXT:    vzeroupper
465; AVX2-NEXT:    retq
466;
467; AVX512F-LABEL: trunc_v32i8_v32i1:
468; AVX512F:       # %bb.0:
469; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
470; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
471; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
472; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
473; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
474; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
475; AVX512F-NEXT:    kxorw %k1, %k0, %k0
476; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
477; AVX512F-NEXT:    kxorw %k1, %k0, %k0
478; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
479; AVX512F-NEXT:    kxorw %k1, %k0, %k0
480; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
481; AVX512F-NEXT:    kxorw %k1, %k0, %k0
482; AVX512F-NEXT:    kmovw %k0, %eax
483; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
484; AVX512F-NEXT:    vzeroupper
485; AVX512F-NEXT:    retq
486;
487; AVX512BW-LABEL: trunc_v32i8_v32i1:
488; AVX512BW:       # %bb.0:
489; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
490; AVX512BW-NEXT:    vpmovmskb %ymm0, %eax
491; AVX512BW-NEXT:    movl %eax, %ecx
492; AVX512BW-NEXT:    shrl $16, %ecx
493; AVX512BW-NEXT:    xorl %eax, %ecx
494; AVX512BW-NEXT:    xorb %ch, %cl
495; AVX512BW-NEXT:    setnp %al
496; AVX512BW-NEXT:    vzeroupper
497; AVX512BW-NEXT:    retq
498;
499; AVX512VL-LABEL: trunc_v32i8_v32i1:
500; AVX512VL:       # %bb.0:
501; AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0
502; AVX512VL-NEXT:    vpmovmskb %ymm0, %eax
503; AVX512VL-NEXT:    movl %eax, %ecx
504; AVX512VL-NEXT:    shrl $16, %ecx
505; AVX512VL-NEXT:    xorl %eax, %ecx
506; AVX512VL-NEXT:    xorb %ch, %cl
507; AVX512VL-NEXT:    setnp %al
508; AVX512VL-NEXT:    vzeroupper
509; AVX512VL-NEXT:    retq
510  %a = trunc <32 x i8> %0 to <32 x i1>
511  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
512  ret i1 %b
513}
514
515define i1 @trunc_v8i64_v8i1(<8 x i64>) nounwind {
516; X86-SSE2-LABEL: trunc_v8i64_v8i1:
517; X86-SSE2:       # %bb.0:
518; X86-SSE2-NEXT:    pushl %ebp
519; X86-SSE2-NEXT:    movl %esp, %ebp
520; X86-SSE2-NEXT:    andl $-16, %esp
521; X86-SSE2-NEXT:    subl $16, %esp
522; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
523; X86-SSE2-NEXT:    pslld $16, %xmm0
524; X86-SSE2-NEXT:    psrad $16, %xmm0
525; X86-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],mem[0,2]
526; X86-SSE2-NEXT:    pslld $16, %xmm2
527; X86-SSE2-NEXT:    psrad $16, %xmm2
528; X86-SSE2-NEXT:    packssdw %xmm2, %xmm0
529; X86-SSE2-NEXT:    psllw $15, %xmm0
530; X86-SSE2-NEXT:    packsswb %xmm0, %xmm0
531; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
532; X86-SSE2-NEXT:    testb %al, %al
533; X86-SSE2-NEXT:    setnp %al
534; X86-SSE2-NEXT:    movl %ebp, %esp
535; X86-SSE2-NEXT:    popl %ebp
536; X86-SSE2-NEXT:    retl
537;
538; X64-SSE2-LABEL: trunc_v8i64_v8i1:
539; X64-SSE2:       # %bb.0:
540; X64-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
541; X64-SSE2-NEXT:    pslld $16, %xmm2
542; X64-SSE2-NEXT:    psrad $16, %xmm2
543; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
544; X64-SSE2-NEXT:    pslld $16, %xmm0
545; X64-SSE2-NEXT:    psrad $16, %xmm0
546; X64-SSE2-NEXT:    packssdw %xmm2, %xmm0
547; X64-SSE2-NEXT:    psllw $15, %xmm0
548; X64-SSE2-NEXT:    packsswb %xmm0, %xmm0
549; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
550; X64-SSE2-NEXT:    testb %al, %al
551; X64-SSE2-NEXT:    setnp %al
552; X64-SSE2-NEXT:    retq
553;
554; SSE41-LABEL: trunc_v8i64_v8i1:
555; SSE41:       # %bb.0:
556; SSE41-NEXT:    pxor %xmm4, %xmm4
557; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
558; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
559; SSE41-NEXT:    packusdw %xmm3, %xmm2
560; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
561; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
562; SSE41-NEXT:    packusdw %xmm1, %xmm0
563; SSE41-NEXT:    packusdw %xmm2, %xmm0
564; SSE41-NEXT:    psllw $15, %xmm0
565; SSE41-NEXT:    packsswb %xmm0, %xmm0
566; SSE41-NEXT:    pmovmskb %xmm0, %eax
567; SSE41-NEXT:    testb %al, %al
568; SSE41-NEXT:    setnp %al
569; SSE41-NEXT:    retq
570;
571; AVX1-LABEL: trunc_v8i64_v8i1:
572; AVX1:       # %bb.0:
573; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
574; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
575; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
576; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
577; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
578; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
579; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
580; AVX1-NEXT:    vmovmskps %ymm0, %eax
581; AVX1-NEXT:    testb %al, %al
582; AVX1-NEXT:    setnp %al
583; AVX1-NEXT:    vzeroupper
584; AVX1-NEXT:    retq
585;
586; AVX2-LABEL: trunc_v8i64_v8i1:
587; AVX2:       # %bb.0:
588; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
589; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
590; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
591; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
592; AVX2-NEXT:    vmovmskps %ymm0, %eax
593; AVX2-NEXT:    testb %al, %al
594; AVX2-NEXT:    setnp %al
595; AVX2-NEXT:    vzeroupper
596; AVX2-NEXT:    retq
597;
598; AVX512F-LABEL: trunc_v8i64_v8i1:
599; AVX512F:       # %bb.0:
600; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
601; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
602; AVX512F-NEXT:    kmovw %k0, %eax
603; AVX512F-NEXT:    testb %al, %al
604; AVX512F-NEXT:    setnp %al
605; AVX512F-NEXT:    vzeroupper
606; AVX512F-NEXT:    retq
607;
608; AVX512BW-LABEL: trunc_v8i64_v8i1:
609; AVX512BW:       # %bb.0:
610; AVX512BW-NEXT:    vpsllq $63, %zmm0, %zmm0
611; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
612; AVX512BW-NEXT:    kmovd %k0, %eax
613; AVX512BW-NEXT:    testb %al, %al
614; AVX512BW-NEXT:    setnp %al
615; AVX512BW-NEXT:    vzeroupper
616; AVX512BW-NEXT:    retq
617;
618; AVX512VL-LABEL: trunc_v8i64_v8i1:
619; AVX512VL:       # %bb.0:
620; AVX512VL-NEXT:    vpsllq $63, %zmm0, %zmm0
621; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
622; AVX512VL-NEXT:    kmovd %k0, %eax
623; AVX512VL-NEXT:    testb %al, %al
624; AVX512VL-NEXT:    setnp %al
625; AVX512VL-NEXT:    vzeroupper
626; AVX512VL-NEXT:    retq
627  %a = trunc <8 x i64> %0 to <8 x i1>
628  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
629  ret i1 %b
630}
631
632define i1 @trunc_v16i32_v16i1(<16 x i32>) nounwind {
633; X86-SSE2-LABEL: trunc_v16i32_v16i1:
634; X86-SSE2:       # %bb.0:
635; X86-SSE2-NEXT:    pushl %ebp
636; X86-SSE2-NEXT:    movl %esp, %ebp
637; X86-SSE2-NEXT:    andl $-16, %esp
638; X86-SSE2-NEXT:    subl $16, %esp
639; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
640; X86-SSE2-NEXT:    pand %xmm3, %xmm1
641; X86-SSE2-NEXT:    pand %xmm3, %xmm0
642; X86-SSE2-NEXT:    packuswb %xmm1, %xmm0
643; X86-SSE2-NEXT:    pand %xmm3, %xmm2
644; X86-SSE2-NEXT:    pand 8(%ebp), %xmm3
645; X86-SSE2-NEXT:    packuswb %xmm3, %xmm2
646; X86-SSE2-NEXT:    packuswb %xmm2, %xmm0
647; X86-SSE2-NEXT:    psllw $7, %xmm0
648; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
649; X86-SSE2-NEXT:    xorb %ah, %al
650; X86-SSE2-NEXT:    setnp %al
651; X86-SSE2-NEXT:    movl %ebp, %esp
652; X86-SSE2-NEXT:    popl %ebp
653; X86-SSE2-NEXT:    retl
654;
655; X64-SSE2-LABEL: trunc_v16i32_v16i1:
656; X64-SSE2:       # %bb.0:
657; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
658; X64-SSE2-NEXT:    pand %xmm4, %xmm3
659; X64-SSE2-NEXT:    pand %xmm4, %xmm2
660; X64-SSE2-NEXT:    packuswb %xmm3, %xmm2
661; X64-SSE2-NEXT:    pand %xmm4, %xmm1
662; X64-SSE2-NEXT:    pand %xmm4, %xmm0
663; X64-SSE2-NEXT:    packuswb %xmm1, %xmm0
664; X64-SSE2-NEXT:    packuswb %xmm2, %xmm0
665; X64-SSE2-NEXT:    psllw $7, %xmm0
666; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
667; X64-SSE2-NEXT:    xorb %ah, %al
668; X64-SSE2-NEXT:    setnp %al
669; X64-SSE2-NEXT:    retq
670;
671; SSE41-LABEL: trunc_v16i32_v16i1:
672; SSE41:       # %bb.0:
673; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
674; SSE41-NEXT:    pand %xmm4, %xmm3
675; SSE41-NEXT:    pand %xmm4, %xmm2
676; SSE41-NEXT:    packusdw %xmm3, %xmm2
677; SSE41-NEXT:    pand %xmm4, %xmm1
678; SSE41-NEXT:    pand %xmm4, %xmm0
679; SSE41-NEXT:    packusdw %xmm1, %xmm0
680; SSE41-NEXT:    packuswb %xmm2, %xmm0
681; SSE41-NEXT:    psllw $7, %xmm0
682; SSE41-NEXT:    pmovmskb %xmm0, %eax
683; SSE41-NEXT:    xorb %ah, %al
684; SSE41-NEXT:    setnp %al
685; SSE41-NEXT:    retq
686;
687; AVX1-LABEL: trunc_v16i32_v16i1:
688; AVX1:       # %bb.0:
689; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
690; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
691; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
692; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
693; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
694; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
695; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
696; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
697; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
698; AVX1-NEXT:    vpmovmskb %xmm0, %eax
699; AVX1-NEXT:    xorb %ah, %al
700; AVX1-NEXT:    setnp %al
701; AVX1-NEXT:    vzeroupper
702; AVX1-NEXT:    retq
703;
704; AVX2-LABEL: trunc_v16i32_v16i1:
705; AVX2:       # %bb.0:
706; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
707; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
708; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
709; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
710; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
711; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
712; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
713; AVX2-NEXT:    vpsllw $7, %xmm0, %xmm0
714; AVX2-NEXT:    vpmovmskb %xmm0, %eax
715; AVX2-NEXT:    xorb %ah, %al
716; AVX2-NEXT:    setnp %al
717; AVX2-NEXT:    vzeroupper
718; AVX2-NEXT:    retq
719;
720; AVX512F-LABEL: trunc_v16i32_v16i1:
721; AVX512F:       # %bb.0:
722; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
723; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
724; AVX512F-NEXT:    kmovw %k0, %eax
725; AVX512F-NEXT:    movl %eax, %ecx
726; AVX512F-NEXT:    shrl $8, %ecx
727; AVX512F-NEXT:    xorb %al, %cl
728; AVX512F-NEXT:    setnp %al
729; AVX512F-NEXT:    vzeroupper
730; AVX512F-NEXT:    retq
731;
732; AVX512BW-LABEL: trunc_v16i32_v16i1:
733; AVX512BW:       # %bb.0:
734; AVX512BW-NEXT:    vpslld $31, %zmm0, %zmm0
735; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
736; AVX512BW-NEXT:    kmovd %k0, %eax
737; AVX512BW-NEXT:    movl %eax, %ecx
738; AVX512BW-NEXT:    shrl $8, %ecx
739; AVX512BW-NEXT:    xorb %al, %cl
740; AVX512BW-NEXT:    setnp %al
741; AVX512BW-NEXT:    vzeroupper
742; AVX512BW-NEXT:    retq
743;
744; AVX512VL-LABEL: trunc_v16i32_v16i1:
745; AVX512VL:       # %bb.0:
746; AVX512VL-NEXT:    vpslld $31, %zmm0, %zmm0
747; AVX512VL-NEXT:    vptestmd %zmm0, %zmm0, %k0
748; AVX512VL-NEXT:    kmovd %k0, %eax
749; AVX512VL-NEXT:    movl %eax, %ecx
750; AVX512VL-NEXT:    shrl $8, %ecx
751; AVX512VL-NEXT:    xorb %al, %cl
752; AVX512VL-NEXT:    setnp %al
753; AVX512VL-NEXT:    vzeroupper
754; AVX512VL-NEXT:    retq
755  %a = trunc <16 x i32> %0 to <16 x i1>
756  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
757  ret i1 %b
758}
759
760define i1 @trunc_v32i16_v32i1(<32 x i16>) nounwind {
761; X86-SSE2-LABEL: trunc_v32i16_v32i1:
762; X86-SSE2:       # %bb.0:
763; X86-SSE2-NEXT:    pushl %ebp
764; X86-SSE2-NEXT:    movl %esp, %ebp
765; X86-SSE2-NEXT:    andl $-16, %esp
766; X86-SSE2-NEXT:    subl $16, %esp
767; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
768; X86-SSE2-NEXT:    pand %xmm3, %xmm1
769; X86-SSE2-NEXT:    pand %xmm3, %xmm0
770; X86-SSE2-NEXT:    packuswb %xmm1, %xmm0
771; X86-SSE2-NEXT:    pand %xmm3, %xmm2
772; X86-SSE2-NEXT:    pand 8(%ebp), %xmm3
773; X86-SSE2-NEXT:    packuswb %xmm3, %xmm2
774; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
775; X86-SSE2-NEXT:    psllw $7, %xmm2
776; X86-SSE2-NEXT:    pmovmskb %xmm2, %eax
777; X86-SSE2-NEXT:    xorb %ah, %al
778; X86-SSE2-NEXT:    setnp %al
779; X86-SSE2-NEXT:    movl %ebp, %esp
780; X86-SSE2-NEXT:    popl %ebp
781; X86-SSE2-NEXT:    retl
782;
783; X64-SSE2-LABEL: trunc_v32i16_v32i1:
784; X64-SSE2:       # %bb.0:
785; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
786; X64-SSE2-NEXT:    pand %xmm4, %xmm3
787; X64-SSE2-NEXT:    pand %xmm4, %xmm2
788; X64-SSE2-NEXT:    packuswb %xmm3, %xmm2
789; X64-SSE2-NEXT:    pand %xmm4, %xmm1
790; X64-SSE2-NEXT:    pand %xmm4, %xmm0
791; X64-SSE2-NEXT:    packuswb %xmm1, %xmm0
792; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
793; X64-SSE2-NEXT:    psllw $7, %xmm0
794; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
795; X64-SSE2-NEXT:    xorb %ah, %al
796; X64-SSE2-NEXT:    setnp %al
797; X64-SSE2-NEXT:    retq
798;
799; SSE41-LABEL: trunc_v32i16_v32i1:
800; SSE41:       # %bb.0:
801; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
802; SSE41-NEXT:    pand %xmm4, %xmm3
803; SSE41-NEXT:    pand %xmm4, %xmm2
804; SSE41-NEXT:    packuswb %xmm3, %xmm2
805; SSE41-NEXT:    pand %xmm4, %xmm1
806; SSE41-NEXT:    pand %xmm4, %xmm0
807; SSE41-NEXT:    packuswb %xmm1, %xmm0
808; SSE41-NEXT:    pxor %xmm2, %xmm0
809; SSE41-NEXT:    psllw $7, %xmm0
810; SSE41-NEXT:    pmovmskb %xmm0, %eax
811; SSE41-NEXT:    xorb %ah, %al
812; SSE41-NEXT:    setnp %al
813; SSE41-NEXT:    retq
814;
815; AVX1-LABEL: trunc_v32i16_v32i1:
816; AVX1:       # %bb.0:
817; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
818; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
819; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
820; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
821; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
822; AVX1-NEXT:    vpmovmskb %xmm0, %eax
823; AVX1-NEXT:    xorb %ah, %al
824; AVX1-NEXT:    setnp %al
825; AVX1-NEXT:    vzeroupper
826; AVX1-NEXT:    retq
827;
828; AVX2-LABEL: trunc_v32i16_v32i1:
829; AVX2:       # %bb.0:
830; AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
831; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
832; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
833; AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
834; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
835; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
836; AVX2-NEXT:    vpmovmskb %ymm0, %eax
837; AVX2-NEXT:    movl %eax, %ecx
838; AVX2-NEXT:    shrl $16, %ecx
839; AVX2-NEXT:    xorl %eax, %ecx
840; AVX2-NEXT:    xorb %ch, %cl
841; AVX2-NEXT:    setnp %al
842; AVX2-NEXT:    vzeroupper
843; AVX2-NEXT:    retq
844;
845; AVX512F-LABEL: trunc_v32i16_v32i1:
846; AVX512F:       # %bb.0:
847; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
848; AVX512F-NEXT:    vpxor %ymm1, %ymm0, %ymm0
849; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
850; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
851; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
852; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
853; AVX512F-NEXT:    kxorw %k1, %k0, %k0
854; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
855; AVX512F-NEXT:    kxorw %k1, %k0, %k0
856; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
857; AVX512F-NEXT:    kxorw %k1, %k0, %k0
858; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
859; AVX512F-NEXT:    kxorw %k1, %k0, %k0
860; AVX512F-NEXT:    kmovw %k0, %eax
861; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
862; AVX512F-NEXT:    vzeroupper
863; AVX512F-NEXT:    retq
864;
865; AVX512BW-LABEL: trunc_v32i16_v32i1:
866; AVX512BW:       # %bb.0:
867; AVX512BW-NEXT:    vpsllw $15, %zmm0, %zmm0
868; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
869; AVX512BW-NEXT:    kmovd %k0, %eax
870; AVX512BW-NEXT:    movl %eax, %ecx
871; AVX512BW-NEXT:    shrl $16, %ecx
872; AVX512BW-NEXT:    xorl %eax, %ecx
873; AVX512BW-NEXT:    xorb %ch, %cl
874; AVX512BW-NEXT:    setnp %al
875; AVX512BW-NEXT:    vzeroupper
876; AVX512BW-NEXT:    retq
877;
878; AVX512VL-LABEL: trunc_v32i16_v32i1:
879; AVX512VL:       # %bb.0:
880; AVX512VL-NEXT:    vpsllw $15, %zmm0, %zmm0
881; AVX512VL-NEXT:    vpmovw2m %zmm0, %k0
882; AVX512VL-NEXT:    kmovd %k0, %eax
883; AVX512VL-NEXT:    movl %eax, %ecx
884; AVX512VL-NEXT:    shrl $16, %ecx
885; AVX512VL-NEXT:    xorl %eax, %ecx
886; AVX512VL-NEXT:    xorb %ch, %cl
887; AVX512VL-NEXT:    setnp %al
888; AVX512VL-NEXT:    vzeroupper
889; AVX512VL-NEXT:    retq
890  %a = trunc <32 x i16> %0 to <32 x i1>
891  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
892  ret i1 %b
893}
894
895define i1 @trunc_v64i8_v64i1(<64 x i8>) nounwind {
896; X86-SSE2-LABEL: trunc_v64i8_v64i1:
897; X86-SSE2:       # %bb.0:
898; X86-SSE2-NEXT:    pushl %ebp
899; X86-SSE2-NEXT:    movl %esp, %ebp
900; X86-SSE2-NEXT:    andl $-16, %esp
901; X86-SSE2-NEXT:    subl $16, %esp
902; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
903; X86-SSE2-NEXT:    pxor 8(%ebp), %xmm1
904; X86-SSE2-NEXT:    pxor %xmm0, %xmm1
905; X86-SSE2-NEXT:    psllw $7, %xmm1
906; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
907; X86-SSE2-NEXT:    xorb %ah, %al
908; X86-SSE2-NEXT:    setnp %al
909; X86-SSE2-NEXT:    movl %ebp, %esp
910; X86-SSE2-NEXT:    popl %ebp
911; X86-SSE2-NEXT:    retl
912;
913; X64-SSE-LABEL: trunc_v64i8_v64i1:
914; X64-SSE:       # %bb.0:
915; X64-SSE-NEXT:    pxor %xmm3, %xmm1
916; X64-SSE-NEXT:    pxor %xmm2, %xmm0
917; X64-SSE-NEXT:    pxor %xmm1, %xmm0
918; X64-SSE-NEXT:    psllw $7, %xmm0
919; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
920; X64-SSE-NEXT:    xorb %ah, %al
921; X64-SSE-NEXT:    setnp %al
922; X64-SSE-NEXT:    retq
923;
924; AVX1-LABEL: trunc_v64i8_v64i1:
925; AVX1:       # %bb.0:
926; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
927; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
928; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0
929; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
930; AVX1-NEXT:    vpmovmskb %xmm0, %eax
931; AVX1-NEXT:    xorb %ah, %al
932; AVX1-NEXT:    setnp %al
933; AVX1-NEXT:    vzeroupper
934; AVX1-NEXT:    retq
935;
936; AVX2-LABEL: trunc_v64i8_v64i1:
937; AVX2:       # %bb.0:
938; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
939; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
940; AVX2-NEXT:    vpmovmskb %ymm0, %eax
941; AVX2-NEXT:    movl %eax, %ecx
942; AVX2-NEXT:    shrl $16, %ecx
943; AVX2-NEXT:    xorl %eax, %ecx
944; AVX2-NEXT:    xorb %ch, %cl
945; AVX2-NEXT:    setnp %al
946; AVX2-NEXT:    vzeroupper
947; AVX2-NEXT:    retq
948;
949; AVX512F-LABEL: trunc_v64i8_v64i1:
950; AVX512F:       # %bb.0:
951; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
952; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
953; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
954; AVX512F-NEXT:    vpxor %xmm2, %xmm3, %xmm2
955; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
956; AVX512F-NEXT:    vpxor %xmm2, %xmm0, %xmm0
957; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
958; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
959; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
960; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
961; AVX512F-NEXT:    kxorw %k1, %k0, %k0
962; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
963; AVX512F-NEXT:    kxorw %k1, %k0, %k0
964; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
965; AVX512F-NEXT:    kxorw %k1, %k0, %k0
966; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
967; AVX512F-NEXT:    kxorw %k1, %k0, %k0
968; AVX512F-NEXT:    kmovw %k0, %eax
969; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
970; AVX512F-NEXT:    vzeroupper
971; AVX512F-NEXT:    retq
972;
973; AVX512BW-LABEL: trunc_v64i8_v64i1:
974; AVX512BW:       # %bb.0:
975; AVX512BW-NEXT:    vpsllw $7, %zmm0, %zmm0
976; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
977; AVX512BW-NEXT:    kmovq %k0, %rax
978; AVX512BW-NEXT:    movq %rax, %rcx
979; AVX512BW-NEXT:    shrq $32, %rcx
980; AVX512BW-NEXT:    xorl %eax, %ecx
981; AVX512BW-NEXT:    movl %ecx, %eax
982; AVX512BW-NEXT:    shrl $16, %eax
983; AVX512BW-NEXT:    xorl %ecx, %eax
984; AVX512BW-NEXT:    xorb %ah, %al
985; AVX512BW-NEXT:    setnp %al
986; AVX512BW-NEXT:    vzeroupper
987; AVX512BW-NEXT:    retq
988;
989; AVX512VL-LABEL: trunc_v64i8_v64i1:
990; AVX512VL:       # %bb.0:
991; AVX512VL-NEXT:    vpsllw $7, %zmm0, %zmm0
992; AVX512VL-NEXT:    vpmovb2m %zmm0, %k0
993; AVX512VL-NEXT:    kmovq %k0, %rax
994; AVX512VL-NEXT:    movq %rax, %rcx
995; AVX512VL-NEXT:    shrq $32, %rcx
996; AVX512VL-NEXT:    xorl %eax, %ecx
997; AVX512VL-NEXT:    movl %ecx, %eax
998; AVX512VL-NEXT:    shrl $16, %eax
999; AVX512VL-NEXT:    xorl %ecx, %eax
1000; AVX512VL-NEXT:    xorb %ah, %al
1001; AVX512VL-NEXT:    setnp %al
1002; AVX512VL-NEXT:    vzeroupper
1003; AVX512VL-NEXT:    retq
1004  %a = trunc <64 x i8> %0 to <64 x i1>
1005  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
1006  ret i1 %b
1007}
1008
1009;
1010; Comparison With Zero
1011;
1012
1013define i1 @icmp0_v2i64_v2i1(<2 x i64>) nounwind {
1014; SSE2-LABEL: icmp0_v2i64_v2i1:
1015; SSE2:       # %bb.0:
1016; SSE2-NEXT:    pxor %xmm1, %xmm1
1017; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1018; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
1019; SSE2-NEXT:    pand %xmm1, %xmm0
1020; SSE2-NEXT:    movmskpd %xmm0, %eax
1021; SSE2-NEXT:    testb %al, %al
1022; SSE2-NEXT:    setnp %al
1023; SSE2-NEXT:    ret{{[l|q]}}
1024;
1025; SSE41-LABEL: icmp0_v2i64_v2i1:
1026; SSE41:       # %bb.0:
1027; SSE41-NEXT:    pxor %xmm1, %xmm1
1028; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
1029; SSE41-NEXT:    movmskpd %xmm1, %eax
1030; SSE41-NEXT:    testb %al, %al
1031; SSE41-NEXT:    setnp %al
1032; SSE41-NEXT:    retq
1033;
1034; AVX-LABEL: icmp0_v2i64_v2i1:
1035; AVX:       # %bb.0:
1036; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1037; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1038; AVX-NEXT:    vmovmskpd %xmm0, %eax
1039; AVX-NEXT:    testb %al, %al
1040; AVX-NEXT:    setnp %al
1041; AVX-NEXT:    retq
1042;
1043; AVX512F-LABEL: icmp0_v2i64_v2i1:
1044; AVX512F:       # %bb.0:
1045; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1046; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1047; AVX512F-NEXT:    kmovw %k0, %eax
1048; AVX512F-NEXT:    testb $3, %al
1049; AVX512F-NEXT:    setnp %al
1050; AVX512F-NEXT:    vzeroupper
1051; AVX512F-NEXT:    retq
1052;
1053; AVX512BW-LABEL: icmp0_v2i64_v2i1:
1054; AVX512BW:       # %bb.0:
1055; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1056; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1057; AVX512BW-NEXT:    kmovd %k0, %eax
1058; AVX512BW-NEXT:    testb $3, %al
1059; AVX512BW-NEXT:    setnp %al
1060; AVX512BW-NEXT:    vzeroupper
1061; AVX512BW-NEXT:    retq
1062;
1063; AVX512VL-LABEL: icmp0_v2i64_v2i1:
1064; AVX512VL:       # %bb.0:
1065; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
1066; AVX512VL-NEXT:    kmovd %k0, %eax
1067; AVX512VL-NEXT:    testb %al, %al
1068; AVX512VL-NEXT:    setnp %al
1069; AVX512VL-NEXT:    retq
1070  %a = icmp eq <2 x i64> %0, zeroinitializer
1071  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
1072  ret i1 %b
1073}
1074
1075define i1 @icmp0_v4i32_v4i1(<4 x i32>) nounwind {
1076; SSE-LABEL: icmp0_v4i32_v4i1:
1077; SSE:       # %bb.0:
1078; SSE-NEXT:    pxor %xmm1, %xmm1
1079; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
1080; SSE-NEXT:    movmskps %xmm1, %eax
1081; SSE-NEXT:    testb %al, %al
1082; SSE-NEXT:    setnp %al
1083; SSE-NEXT:    ret{{[l|q]}}
1084;
1085; AVX-LABEL: icmp0_v4i32_v4i1:
1086; AVX:       # %bb.0:
1087; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1088; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1089; AVX-NEXT:    vmovmskps %xmm0, %eax
1090; AVX-NEXT:    testb %al, %al
1091; AVX-NEXT:    setnp %al
1092; AVX-NEXT:    retq
1093;
1094; AVX512F-LABEL: icmp0_v4i32_v4i1:
1095; AVX512F:       # %bb.0:
1096; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1097; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1098; AVX512F-NEXT:    kmovw %k0, %eax
1099; AVX512F-NEXT:    testb $15, %al
1100; AVX512F-NEXT:    setnp %al
1101; AVX512F-NEXT:    vzeroupper
1102; AVX512F-NEXT:    retq
1103;
1104; AVX512BW-LABEL: icmp0_v4i32_v4i1:
1105; AVX512BW:       # %bb.0:
1106; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1107; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1108; AVX512BW-NEXT:    kmovd %k0, %eax
1109; AVX512BW-NEXT:    testb $15, %al
1110; AVX512BW-NEXT:    setnp %al
1111; AVX512BW-NEXT:    vzeroupper
1112; AVX512BW-NEXT:    retq
1113;
1114; AVX512VL-LABEL: icmp0_v4i32_v4i1:
1115; AVX512VL:       # %bb.0:
1116; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
1117; AVX512VL-NEXT:    kmovd %k0, %eax
1118; AVX512VL-NEXT:    testb %al, %al
1119; AVX512VL-NEXT:    setnp %al
1120; AVX512VL-NEXT:    retq
1121  %a = icmp eq <4 x i32> %0, zeroinitializer
1122  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
1123  ret i1 %b
1124}
1125
1126define i1 @icmp0_v8i16_v8i1(<8 x i16>) nounwind {
1127; SSE-LABEL: icmp0_v8i16_v8i1:
1128; SSE:       # %bb.0:
1129; SSE-NEXT:    pxor %xmm1, %xmm1
1130; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
1131; SSE-NEXT:    packsswb %xmm1, %xmm1
1132; SSE-NEXT:    pmovmskb %xmm1, %eax
1133; SSE-NEXT:    testb %al, %al
1134; SSE-NEXT:    setnp %al
1135; SSE-NEXT:    ret{{[l|q]}}
1136;
1137; AVX-LABEL: icmp0_v8i16_v8i1:
1138; AVX:       # %bb.0:
1139; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1140; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1141; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1142; AVX-NEXT:    vpmovmskb %xmm0, %eax
1143; AVX-NEXT:    testb %al, %al
1144; AVX-NEXT:    setnp %al
1145; AVX-NEXT:    retq
1146;
1147; AVX512F-LABEL: icmp0_v8i16_v8i1:
1148; AVX512F:       # %bb.0:
1149; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1150; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1151; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
1152; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
1153; AVX512F-NEXT:    kmovw %k0, %eax
1154; AVX512F-NEXT:    testb %al, %al
1155; AVX512F-NEXT:    setnp %al
1156; AVX512F-NEXT:    vzeroupper
1157; AVX512F-NEXT:    retq
1158;
1159; AVX512BW-LABEL: icmp0_v8i16_v8i1:
1160; AVX512BW:       # %bb.0:
1161; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1162; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
1163; AVX512BW-NEXT:    kmovd %k0, %eax
1164; AVX512BW-NEXT:    testb %al, %al
1165; AVX512BW-NEXT:    setnp %al
1166; AVX512BW-NEXT:    vzeroupper
1167; AVX512BW-NEXT:    retq
1168;
1169; AVX512VL-LABEL: icmp0_v8i16_v8i1:
1170; AVX512VL:       # %bb.0:
1171; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
1172; AVX512VL-NEXT:    kmovd %k0, %eax
1173; AVX512VL-NEXT:    testb %al, %al
1174; AVX512VL-NEXT:    setnp %al
1175; AVX512VL-NEXT:    retq
1176  %a = icmp eq <8 x i16> %0, zeroinitializer
1177  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1178  ret i1 %b
1179}
1180
1181define i1 @icmp0_v16i8_v16i1(<16 x i8>) nounwind {
1182; SSE-LABEL: icmp0_v16i8_v16i1:
1183; SSE:       # %bb.0:
1184; SSE-NEXT:    pxor %xmm1, %xmm1
1185; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
1186; SSE-NEXT:    pmovmskb %xmm1, %eax
1187; SSE-NEXT:    xorb %ah, %al
1188; SSE-NEXT:    setnp %al
1189; SSE-NEXT:    ret{{[l|q]}}
1190;
1191; AVX-LABEL: icmp0_v16i8_v16i1:
1192; AVX:       # %bb.0:
1193; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1194; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1195; AVX-NEXT:    vpmovmskb %xmm0, %eax
1196; AVX-NEXT:    xorb %ah, %al
1197; AVX-NEXT:    setnp %al
1198; AVX-NEXT:    retq
1199;
1200; AVX512F-LABEL: icmp0_v16i8_v16i1:
1201; AVX512F:       # %bb.0:
1202; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1203; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1204; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
1205; AVX512F-NEXT:    xorb %ah, %al
1206; AVX512F-NEXT:    setnp %al
1207; AVX512F-NEXT:    retq
1208;
1209; AVX512BW-LABEL: icmp0_v16i8_v16i1:
1210; AVX512BW:       # %bb.0:
1211; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1212; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
1213; AVX512BW-NEXT:    kmovd %k0, %eax
1214; AVX512BW-NEXT:    movl %eax, %ecx
1215; AVX512BW-NEXT:    shrl $8, %ecx
1216; AVX512BW-NEXT:    xorb %al, %cl
1217; AVX512BW-NEXT:    setnp %al
1218; AVX512BW-NEXT:    vzeroupper
1219; AVX512BW-NEXT:    retq
1220;
1221; AVX512VL-LABEL: icmp0_v16i8_v16i1:
1222; AVX512VL:       # %bb.0:
1223; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
1224; AVX512VL-NEXT:    kmovd %k0, %eax
1225; AVX512VL-NEXT:    movl %eax, %ecx
1226; AVX512VL-NEXT:    shrl $8, %ecx
1227; AVX512VL-NEXT:    xorb %al, %cl
1228; AVX512VL-NEXT:    setnp %al
1229; AVX512VL-NEXT:    retq
1230  %a = icmp eq <16 x i8> %0, zeroinitializer
1231  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1232  ret i1 %b
1233}
1234
1235define i1 @icmp0_v4i64_v4i1(<4 x i64>) nounwind {
1236; SSE2-LABEL: icmp0_v4i64_v4i1:
1237; SSE2:       # %bb.0:
1238; SSE2-NEXT:    pxor %xmm2, %xmm2
1239; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
1240; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
1241; SSE2-NEXT:    movdqa %xmm0, %xmm2
1242; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
1243; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1244; SSE2-NEXT:    andps %xmm2, %xmm0
1245; SSE2-NEXT:    movmskps %xmm0, %eax
1246; SSE2-NEXT:    testb %al, %al
1247; SSE2-NEXT:    setnp %al
1248; SSE2-NEXT:    ret{{[l|q]}}
1249;
1250; SSE41-LABEL: icmp0_v4i64_v4i1:
1251; SSE41:       # %bb.0:
1252; SSE41-NEXT:    pxor %xmm2, %xmm2
1253; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
1254; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
1255; SSE41-NEXT:    packssdw %xmm1, %xmm0
1256; SSE41-NEXT:    movmskps %xmm0, %eax
1257; SSE41-NEXT:    testb %al, %al
1258; SSE41-NEXT:    setnp %al
1259; SSE41-NEXT:    retq
1260;
1261; AVX1-LABEL: icmp0_v4i64_v4i1:
1262; AVX1:       # %bb.0:
1263; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1264; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1265; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
1266; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
1267; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1268; AVX1-NEXT:    vmovmskpd %ymm0, %eax
1269; AVX1-NEXT:    testb %al, %al
1270; AVX1-NEXT:    setnp %al
1271; AVX1-NEXT:    vzeroupper
1272; AVX1-NEXT:    retq
1273;
1274; AVX2-LABEL: icmp0_v4i64_v4i1:
1275; AVX2:       # %bb.0:
1276; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1277; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
1278; AVX2-NEXT:    vmovmskpd %ymm0, %eax
1279; AVX2-NEXT:    testb %al, %al
1280; AVX2-NEXT:    setnp %al
1281; AVX2-NEXT:    vzeroupper
1282; AVX2-NEXT:    retq
1283;
1284; AVX512F-LABEL: icmp0_v4i64_v4i1:
1285; AVX512F:       # %bb.0:
1286; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1287; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1288; AVX512F-NEXT:    kmovw %k0, %eax
1289; AVX512F-NEXT:    testb $15, %al
1290; AVX512F-NEXT:    setnp %al
1291; AVX512F-NEXT:    vzeroupper
1292; AVX512F-NEXT:    retq
1293;
1294; AVX512BW-LABEL: icmp0_v4i64_v4i1:
1295; AVX512BW:       # %bb.0:
1296; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1297; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1298; AVX512BW-NEXT:    kmovd %k0, %eax
1299; AVX512BW-NEXT:    testb $15, %al
1300; AVX512BW-NEXT:    setnp %al
1301; AVX512BW-NEXT:    vzeroupper
1302; AVX512BW-NEXT:    retq
1303;
1304; AVX512VL-LABEL: icmp0_v4i64_v4i1:
1305; AVX512VL:       # %bb.0:
1306; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
1307; AVX512VL-NEXT:    kmovd %k0, %eax
1308; AVX512VL-NEXT:    testb %al, %al
1309; AVX512VL-NEXT:    setnp %al
1310; AVX512VL-NEXT:    vzeroupper
1311; AVX512VL-NEXT:    retq
1312  %a = icmp eq <4 x i64> %0, zeroinitializer
1313  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
1314  ret i1 %b
1315}
1316
1317define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind {
1318; SSE-LABEL: icmp0_v8i32_v8i1:
1319; SSE:       # %bb.0:
1320; SSE-NEXT:    pxor %xmm2, %xmm2
1321; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
1322; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
1323; SSE-NEXT:    packssdw %xmm1, %xmm0
1324; SSE-NEXT:    packsswb %xmm0, %xmm0
1325; SSE-NEXT:    pmovmskb %xmm0, %eax
1326; SSE-NEXT:    testb %al, %al
1327; SSE-NEXT:    setnp %al
1328; SSE-NEXT:    ret{{[l|q]}}
1329;
1330; AVX1-LABEL: icmp0_v8i32_v8i1:
1331; AVX1:       # %bb.0:
1332; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1333; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1334; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
1335; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1336; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1337; AVX1-NEXT:    vmovmskps %ymm0, %eax
1338; AVX1-NEXT:    testb %al, %al
1339; AVX1-NEXT:    setnp %al
1340; AVX1-NEXT:    vzeroupper
1341; AVX1-NEXT:    retq
1342;
1343; AVX2-LABEL: icmp0_v8i32_v8i1:
1344; AVX2:       # %bb.0:
1345; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1346; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
1347; AVX2-NEXT:    vmovmskps %ymm0, %eax
1348; AVX2-NEXT:    testb %al, %al
1349; AVX2-NEXT:    setnp %al
1350; AVX2-NEXT:    vzeroupper
1351; AVX2-NEXT:    retq
1352;
1353; AVX512F-LABEL: icmp0_v8i32_v8i1:
1354; AVX512F:       # %bb.0:
1355; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1356; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1357; AVX512F-NEXT:    kmovw %k0, %eax
1358; AVX512F-NEXT:    testb %al, %al
1359; AVX512F-NEXT:    setnp %al
1360; AVX512F-NEXT:    vzeroupper
1361; AVX512F-NEXT:    retq
1362;
1363; AVX512BW-LABEL: icmp0_v8i32_v8i1:
1364; AVX512BW:       # %bb.0:
1365; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1366; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1367; AVX512BW-NEXT:    kmovd %k0, %eax
1368; AVX512BW-NEXT:    testb %al, %al
1369; AVX512BW-NEXT:    setnp %al
1370; AVX512BW-NEXT:    vzeroupper
1371; AVX512BW-NEXT:    retq
1372;
1373; AVX512VL-LABEL: icmp0_v8i32_v8i1:
1374; AVX512VL:       # %bb.0:
1375; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
1376; AVX512VL-NEXT:    kmovd %k0, %eax
1377; AVX512VL-NEXT:    testb %al, %al
1378; AVX512VL-NEXT:    setnp %al
1379; AVX512VL-NEXT:    vzeroupper
1380; AVX512VL-NEXT:    retq
1381  %a = icmp eq <8 x i32> %0, zeroinitializer
1382  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1383  ret i1 %b
1384}
1385
1386define i1 @icmp0_v16i16_v16i1(<16 x i16>) nounwind {
1387; SSE-LABEL: icmp0_v16i16_v16i1:
1388; SSE:       # %bb.0:
1389; SSE-NEXT:    pxor %xmm2, %xmm2
1390; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
1391; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
1392; SSE-NEXT:    packsswb %xmm1, %xmm0
1393; SSE-NEXT:    pmovmskb %xmm0, %eax
1394; SSE-NEXT:    xorb %ah, %al
1395; SSE-NEXT:    setnp %al
1396; SSE-NEXT:    ret{{[l|q]}}
1397;
1398; AVX1-LABEL: icmp0_v16i16_v16i1:
1399; AVX1:       # %bb.0:
1400; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1401; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1402; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
1403; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
1404; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1405; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1406; AVX1-NEXT:    xorb %ah, %al
1407; AVX1-NEXT:    setnp %al
1408; AVX1-NEXT:    vzeroupper
1409; AVX1-NEXT:    retq
1410;
1411; AVX2-LABEL: icmp0_v16i16_v16i1:
1412; AVX2:       # %bb.0:
1413; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1414; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1415; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1416; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1417; AVX2-NEXT:    vpmovmskb %xmm0, %eax
1418; AVX2-NEXT:    xorb %ah, %al
1419; AVX2-NEXT:    setnp %al
1420; AVX2-NEXT:    vzeroupper
1421; AVX2-NEXT:    retq
1422;
1423; AVX512F-LABEL: icmp0_v16i16_v16i1:
1424; AVX512F:       # %bb.0:
1425; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1426; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1427; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1428; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1429; AVX512F-NEXT:    kmovw %k0, %eax
1430; AVX512F-NEXT:    movl %eax, %ecx
1431; AVX512F-NEXT:    shrl $8, %ecx
1432; AVX512F-NEXT:    xorb %al, %cl
1433; AVX512F-NEXT:    setnp %al
1434; AVX512F-NEXT:    vzeroupper
1435; AVX512F-NEXT:    retq
1436;
1437; AVX512BW-LABEL: icmp0_v16i16_v16i1:
1438; AVX512BW:       # %bb.0:
1439; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1440; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
1441; AVX512BW-NEXT:    kmovd %k0, %eax
1442; AVX512BW-NEXT:    movl %eax, %ecx
1443; AVX512BW-NEXT:    shrl $8, %ecx
1444; AVX512BW-NEXT:    xorb %al, %cl
1445; AVX512BW-NEXT:    setnp %al
1446; AVX512BW-NEXT:    vzeroupper
1447; AVX512BW-NEXT:    retq
1448;
1449; AVX512VL-LABEL: icmp0_v16i16_v16i1:
1450; AVX512VL:       # %bb.0:
1451; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
1452; AVX512VL-NEXT:    kmovd %k0, %eax
1453; AVX512VL-NEXT:    movl %eax, %ecx
1454; AVX512VL-NEXT:    shrl $8, %ecx
1455; AVX512VL-NEXT:    xorb %al, %cl
1456; AVX512VL-NEXT:    setnp %al
1457; AVX512VL-NEXT:    vzeroupper
1458; AVX512VL-NEXT:    retq
1459  %a = icmp eq <16 x i16> %0, zeroinitializer
1460  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1461  ret i1 %b
1462}
1463
1464define i1 @icmp0_v32i8_v32i1(<32 x i8>) nounwind {
1465; SSE-LABEL: icmp0_v32i8_v32i1:
1466; SSE:       # %bb.0:
1467; SSE-NEXT:    pxor %xmm2, %xmm2
1468; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
1469; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
1470; SSE-NEXT:    pxor %xmm1, %xmm0
1471; SSE-NEXT:    pmovmskb %xmm0, %eax
1472; SSE-NEXT:    xorb %ah, %al
1473; SSE-NEXT:    setnp %al
1474; SSE-NEXT:    ret{{[l|q]}}
1475;
1476; AVX1-LABEL: icmp0_v32i8_v32i1:
1477; AVX1:       # %bb.0:
1478; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1479; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1480; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
1481; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
1482; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1483; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1484; AVX1-NEXT:    xorb %ah, %al
1485; AVX1-NEXT:    setnp %al
1486; AVX1-NEXT:    vzeroupper
1487; AVX1-NEXT:    retq
1488;
1489; AVX2-LABEL: icmp0_v32i8_v32i1:
1490; AVX2:       # %bb.0:
1491; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1492; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
1493; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1494; AVX2-NEXT:    movl %eax, %ecx
1495; AVX2-NEXT:    shrl $16, %ecx
1496; AVX2-NEXT:    xorl %eax, %ecx
1497; AVX2-NEXT:    xorb %ch, %cl
1498; AVX2-NEXT:    setnp %al
1499; AVX2-NEXT:    vzeroupper
1500; AVX2-NEXT:    retq
1501;
1502; AVX512F-LABEL: icmp0_v32i8_v32i1:
1503; AVX512F:       # %bb.0:
1504; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1505; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
1506; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
1507; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1508; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1509; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1510; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
1511; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1512; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
1513; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1514; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
1515; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1516; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
1517; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1518; AVX512F-NEXT:    kmovw %k0, %eax
1519; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
1520; AVX512F-NEXT:    vzeroupper
1521; AVX512F-NEXT:    retq
1522;
1523; AVX512BW-LABEL: icmp0_v32i8_v32i1:
1524; AVX512BW:       # %bb.0:
1525; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1526; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
1527; AVX512BW-NEXT:    kmovd %k0, %eax
1528; AVX512BW-NEXT:    movl %eax, %ecx
1529; AVX512BW-NEXT:    shrl $16, %ecx
1530; AVX512BW-NEXT:    xorl %eax, %ecx
1531; AVX512BW-NEXT:    xorb %ch, %cl
1532; AVX512BW-NEXT:    setnp %al
1533; AVX512BW-NEXT:    vzeroupper
1534; AVX512BW-NEXT:    retq
1535;
1536; AVX512VL-LABEL: icmp0_v32i8_v32i1:
1537; AVX512VL:       # %bb.0:
1538; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
1539; AVX512VL-NEXT:    kmovd %k0, %eax
1540; AVX512VL-NEXT:    movl %eax, %ecx
1541; AVX512VL-NEXT:    shrl $16, %ecx
1542; AVX512VL-NEXT:    xorl %eax, %ecx
1543; AVX512VL-NEXT:    xorb %ch, %cl
1544; AVX512VL-NEXT:    setnp %al
1545; AVX512VL-NEXT:    vzeroupper
1546; AVX512VL-NEXT:    retq
1547  %a = icmp eq <32 x i8> %0, zeroinitializer
1548  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
1549  ret i1 %b
1550}
1551
1552define i1 @icmp0_v8i64_v8i1(<8 x i64>) nounwind {
1553; X86-SSE2-LABEL: icmp0_v8i64_v8i1:
1554; X86-SSE2:       # %bb.0:
1555; X86-SSE2-NEXT:    pushl %ebp
1556; X86-SSE2-NEXT:    movl %esp, %ebp
1557; X86-SSE2-NEXT:    andl $-16, %esp
1558; X86-SSE2-NEXT:    subl $16, %esp
1559; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1560; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
1561; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,0,3,2]
1562; X86-SSE2-NEXT:    pand %xmm1, %xmm4
1563; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm0
1564; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1565; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1566; X86-SSE2-NEXT:    packssdw %xmm4, %xmm1
1567; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1568; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
1569; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1570; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm3
1571; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
1572; X86-SSE2-NEXT:    pand %xmm3, %xmm2
1573; X86-SSE2-NEXT:    packssdw %xmm2, %xmm0
1574; X86-SSE2-NEXT:    packssdw %xmm0, %xmm1
1575; X86-SSE2-NEXT:    packsswb %xmm1, %xmm1
1576; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
1577; X86-SSE2-NEXT:    testb %al, %al
1578; X86-SSE2-NEXT:    setnp %al
1579; X86-SSE2-NEXT:    movl %ebp, %esp
1580; X86-SSE2-NEXT:    popl %ebp
1581; X86-SSE2-NEXT:    retl
1582;
1583; X64-SSE2-LABEL: icmp0_v8i64_v8i1:
1584; X64-SSE2:       # %bb.0:
1585; X64-SSE2-NEXT:    pxor %xmm4, %xmm4
1586; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
1587; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1588; X64-SSE2-NEXT:    pand %xmm3, %xmm5
1589; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm2
1590; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1591; X64-SSE2-NEXT:    pand %xmm2, %xmm3
1592; X64-SSE2-NEXT:    packssdw %xmm5, %xmm3
1593; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm1
1594; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1595; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1596; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
1597; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1598; X64-SSE2-NEXT:    pand %xmm0, %xmm1
1599; X64-SSE2-NEXT:    packssdw %xmm2, %xmm1
1600; X64-SSE2-NEXT:    packssdw %xmm3, %xmm1
1601; X64-SSE2-NEXT:    packsswb %xmm1, %xmm1
1602; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
1603; X64-SSE2-NEXT:    testb %al, %al
1604; X64-SSE2-NEXT:    setnp %al
1605; X64-SSE2-NEXT:    retq
1606;
1607; SSE41-LABEL: icmp0_v8i64_v8i1:
1608; SSE41:       # %bb.0:
1609; SSE41-NEXT:    pxor %xmm4, %xmm4
1610; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
1611; SSE41-NEXT:    pcmpeqq %xmm4, %xmm2
1612; SSE41-NEXT:    packssdw %xmm3, %xmm2
1613; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
1614; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
1615; SSE41-NEXT:    packssdw %xmm1, %xmm0
1616; SSE41-NEXT:    packssdw %xmm2, %xmm0
1617; SSE41-NEXT:    packsswb %xmm0, %xmm0
1618; SSE41-NEXT:    pmovmskb %xmm0, %eax
1619; SSE41-NEXT:    testb %al, %al
1620; SSE41-NEXT:    setnp %al
1621; SSE41-NEXT:    retq
1622;
1623; AVX1-LABEL: icmp0_v8i64_v8i1:
1624; AVX1:       # %bb.0:
1625; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1626; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1627; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
1628; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
1629; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1630; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1631; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
1632; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
1633; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1634; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1635; AVX1-NEXT:    vmovmskps %ymm0, %eax
1636; AVX1-NEXT:    testb %al, %al
1637; AVX1-NEXT:    setnp %al
1638; AVX1-NEXT:    vzeroupper
1639; AVX1-NEXT:    retq
1640;
1641; AVX2-LABEL: icmp0_v8i64_v8i1:
1642; AVX2:       # %bb.0:
1643; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1644; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
1645; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
1646; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1647; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1648; AVX2-NEXT:    vmovmskps %ymm0, %eax
1649; AVX2-NEXT:    testb %al, %al
1650; AVX2-NEXT:    setnp %al
1651; AVX2-NEXT:    vzeroupper
1652; AVX2-NEXT:    retq
1653;
1654; AVX512F-LABEL: icmp0_v8i64_v8i1:
1655; AVX512F:       # %bb.0:
1656; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1657; AVX512F-NEXT:    kmovw %k0, %eax
1658; AVX512F-NEXT:    testb %al, %al
1659; AVX512F-NEXT:    setnp %al
1660; AVX512F-NEXT:    vzeroupper
1661; AVX512F-NEXT:    retq
1662;
1663; AVX512BW-LABEL: icmp0_v8i64_v8i1:
1664; AVX512BW:       # %bb.0:
1665; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1666; AVX512BW-NEXT:    kmovd %k0, %eax
1667; AVX512BW-NEXT:    testb %al, %al
1668; AVX512BW-NEXT:    setnp %al
1669; AVX512BW-NEXT:    vzeroupper
1670; AVX512BW-NEXT:    retq
1671;
1672; AVX512VL-LABEL: icmp0_v8i64_v8i1:
1673; AVX512VL:       # %bb.0:
1674; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1675; AVX512VL-NEXT:    kmovd %k0, %eax
1676; AVX512VL-NEXT:    testb %al, %al
1677; AVX512VL-NEXT:    setnp %al
1678; AVX512VL-NEXT:    vzeroupper
1679; AVX512VL-NEXT:    retq
1680  %a = icmp eq <8 x i64> %0, zeroinitializer
1681  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1682  ret i1 %b
1683}
1684
1685define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind {
1686; X86-SSE2-LABEL: icmp0_v16i32_v16i1:
1687; X86-SSE2:       # %bb.0:
1688; X86-SSE2-NEXT:    pushl %ebp
1689; X86-SSE2-NEXT:    movl %esp, %ebp
1690; X86-SSE2-NEXT:    andl $-16, %esp
1691; X86-SSE2-NEXT:    subl $16, %esp
1692; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1693; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
1694; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm0
1695; X86-SSE2-NEXT:    packssdw %xmm1, %xmm0
1696; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1697; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm3
1698; X86-SSE2-NEXT:    packssdw %xmm3, %xmm2
1699; X86-SSE2-NEXT:    packsswb %xmm2, %xmm0
1700; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1701; X86-SSE2-NEXT:    xorb %ah, %al
1702; X86-SSE2-NEXT:    setnp %al
1703; X86-SSE2-NEXT:    movl %ebp, %esp
1704; X86-SSE2-NEXT:    popl %ebp
1705; X86-SSE2-NEXT:    retl
1706;
1707; X64-SSE-LABEL: icmp0_v16i32_v16i1:
1708; X64-SSE:       # %bb.0:
1709; X64-SSE-NEXT:    pxor %xmm4, %xmm4
1710; X64-SSE-NEXT:    pcmpeqd %xmm4, %xmm3
1711; X64-SSE-NEXT:    pcmpeqd %xmm4, %xmm2
1712; X64-SSE-NEXT:    packssdw %xmm3, %xmm2
1713; X64-SSE-NEXT:    pcmpeqd %xmm4, %xmm1
1714; X64-SSE-NEXT:    pcmpeqd %xmm4, %xmm0
1715; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
1716; X64-SSE-NEXT:    packsswb %xmm2, %xmm0
1717; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
1718; X64-SSE-NEXT:    xorb %ah, %al
1719; X64-SSE-NEXT:    setnp %al
1720; X64-SSE-NEXT:    retq
1721;
1722; AVX1-LABEL: icmp0_v16i32_v16i1:
1723; AVX1:       # %bb.0:
1724; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1725; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1726; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1727; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
1728; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1729; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1730; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1731; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1732; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1733; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1734; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1735; AVX1-NEXT:    xorb %ah, %al
1736; AVX1-NEXT:    setnp %al
1737; AVX1-NEXT:    vzeroupper
1738; AVX1-NEXT:    retq
1739;
1740; AVX2-LABEL: icmp0_v16i32_v16i1:
1741; AVX2:       # %bb.0:
1742; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1743; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
1744; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
1745; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1746; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1747; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1748; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1749; AVX2-NEXT:    vpmovmskb %xmm0, %eax
1750; AVX2-NEXT:    xorb %ah, %al
1751; AVX2-NEXT:    setnp %al
1752; AVX2-NEXT:    vzeroupper
1753; AVX2-NEXT:    retq
1754;
1755; AVX512F-LABEL: icmp0_v16i32_v16i1:
1756; AVX512F:       # %bb.0:
1757; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1758; AVX512F-NEXT:    kmovw %k0, %eax
1759; AVX512F-NEXT:    movl %eax, %ecx
1760; AVX512F-NEXT:    shrl $8, %ecx
1761; AVX512F-NEXT:    xorb %al, %cl
1762; AVX512F-NEXT:    setnp %al
1763; AVX512F-NEXT:    vzeroupper
1764; AVX512F-NEXT:    retq
1765;
1766; AVX512BW-LABEL: icmp0_v16i32_v16i1:
1767; AVX512BW:       # %bb.0:
1768; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1769; AVX512BW-NEXT:    kmovd %k0, %eax
1770; AVX512BW-NEXT:    movl %eax, %ecx
1771; AVX512BW-NEXT:    shrl $8, %ecx
1772; AVX512BW-NEXT:    xorb %al, %cl
1773; AVX512BW-NEXT:    setnp %al
1774; AVX512BW-NEXT:    vzeroupper
1775; AVX512BW-NEXT:    retq
1776;
1777; AVX512VL-LABEL: icmp0_v16i32_v16i1:
1778; AVX512VL:       # %bb.0:
1779; AVX512VL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1780; AVX512VL-NEXT:    kmovd %k0, %eax
1781; AVX512VL-NEXT:    movl %eax, %ecx
1782; AVX512VL-NEXT:    shrl $8, %ecx
1783; AVX512VL-NEXT:    xorb %al, %cl
1784; AVX512VL-NEXT:    setnp %al
1785; AVX512VL-NEXT:    vzeroupper
1786; AVX512VL-NEXT:    retq
1787  %a = icmp eq <16 x i32> %0, zeroinitializer
1788  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1789  ret i1 %b
1790}
1791
1792define i1 @icmp0_v32i16_v32i1(<32 x i16>) nounwind {
1793; X86-SSE2-LABEL: icmp0_v32i16_v32i1:
1794; X86-SSE2:       # %bb.0:
1795; X86-SSE2-NEXT:    pushl %ebp
1796; X86-SSE2-NEXT:    movl %esp, %ebp
1797; X86-SSE2-NEXT:    andl $-16, %esp
1798; X86-SSE2-NEXT:    subl $16, %esp
1799; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1800; X86-SSE2-NEXT:    pcmpeqw %xmm3, %xmm1
1801; X86-SSE2-NEXT:    pcmpeqw %xmm3, %xmm2
1802; X86-SSE2-NEXT:    pcmpeqw %xmm3, %xmm0
1803; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
1804; X86-SSE2-NEXT:    pcmpeqw 8(%ebp), %xmm3
1805; X86-SSE2-NEXT:    pxor %xmm1, %xmm3
1806; X86-SSE2-NEXT:    packsswb %xmm3, %xmm0
1807; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1808; X86-SSE2-NEXT:    xorb %ah, %al
1809; X86-SSE2-NEXT:    setnp %al
1810; X86-SSE2-NEXT:    movl %ebp, %esp
1811; X86-SSE2-NEXT:    popl %ebp
1812; X86-SSE2-NEXT:    retl
1813;
1814; X64-SSE-LABEL: icmp0_v32i16_v32i1:
1815; X64-SSE:       # %bb.0:
1816; X64-SSE-NEXT:    pxor %xmm4, %xmm4
1817; X64-SSE-NEXT:    pcmpeqw %xmm4, %xmm2
1818; X64-SSE-NEXT:    pcmpeqw %xmm4, %xmm0
1819; X64-SSE-NEXT:    pxor %xmm2, %xmm0
1820; X64-SSE-NEXT:    pcmpeqw %xmm4, %xmm3
1821; X64-SSE-NEXT:    pcmpeqw %xmm4, %xmm1
1822; X64-SSE-NEXT:    pxor %xmm3, %xmm1
1823; X64-SSE-NEXT:    packsswb %xmm1, %xmm0
1824; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
1825; X64-SSE-NEXT:    xorb %ah, %al
1826; X64-SSE-NEXT:    setnp %al
1827; X64-SSE-NEXT:    retq
1828;
1829; AVX1-LABEL: icmp0_v32i16_v32i1:
1830; AVX1:       # %bb.0:
1831; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1832; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm3
1833; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm4
1834; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
1835; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
1836; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
1837; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1838; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
1839; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1840; AVX1-NEXT:    vpacksswb %xmm0, %xmm3, %xmm0
1841; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1842; AVX1-NEXT:    xorb %ah, %al
1843; AVX1-NEXT:    setnp %al
1844; AVX1-NEXT:    vzeroupper
1845; AVX1-NEXT:    retq
1846;
1847; AVX2-LABEL: icmp0_v32i16_v32i1:
1848; AVX2:       # %bb.0:
1849; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1850; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
1851; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1852; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
1853; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1854; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1855; AVX2-NEXT:    movl %eax, %ecx
1856; AVX2-NEXT:    shrl $16, %ecx
1857; AVX2-NEXT:    xorl %eax, %ecx
1858; AVX2-NEXT:    xorb %ch, %cl
1859; AVX2-NEXT:    setnp %al
1860; AVX2-NEXT:    vzeroupper
1861; AVX2-NEXT:    retq
1862;
1863; AVX512F-LABEL: icmp0_v32i16_v32i1:
1864; AVX512F:       # %bb.0:
1865; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1866; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1867; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
1868; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1869; AVX512F-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1870; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1871; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1872; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
1873; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1874; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
1875; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1876; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
1877; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1878; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
1879; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1880; AVX512F-NEXT:    kmovw %k0, %eax
1881; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
1882; AVX512F-NEXT:    vzeroupper
1883; AVX512F-NEXT:    retq
1884;
1885; AVX512BW-LABEL: icmp0_v32i16_v32i1:
1886; AVX512BW:       # %bb.0:
1887; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
1888; AVX512BW-NEXT:    kmovd %k0, %eax
1889; AVX512BW-NEXT:    movl %eax, %ecx
1890; AVX512BW-NEXT:    shrl $16, %ecx
1891; AVX512BW-NEXT:    xorl %eax, %ecx
1892; AVX512BW-NEXT:    xorb %ch, %cl
1893; AVX512BW-NEXT:    setnp %al
1894; AVX512BW-NEXT:    vzeroupper
1895; AVX512BW-NEXT:    retq
1896;
1897; AVX512VL-LABEL: icmp0_v32i16_v32i1:
1898; AVX512VL:       # %bb.0:
1899; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
1900; AVX512VL-NEXT:    kmovd %k0, %eax
1901; AVX512VL-NEXT:    movl %eax, %ecx
1902; AVX512VL-NEXT:    shrl $16, %ecx
1903; AVX512VL-NEXT:    xorl %eax, %ecx
1904; AVX512VL-NEXT:    xorb %ch, %cl
1905; AVX512VL-NEXT:    setnp %al
1906; AVX512VL-NEXT:    vzeroupper
1907; AVX512VL-NEXT:    retq
1908  %a = icmp eq <32 x i16> %0, zeroinitializer
1909  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
1910  ret i1 %b
1911}
1912
1913define i1 @icmp0_v64i8_v64i1(<64 x i8>) nounwind {
1914; X86-SSE2-LABEL: icmp0_v64i8_v64i1:
1915; X86-SSE2:       # %bb.0:
1916; X86-SSE2-NEXT:    pushl %ebp
1917; X86-SSE2-NEXT:    movl %esp, %ebp
1918; X86-SSE2-NEXT:    andl $-16, %esp
1919; X86-SSE2-NEXT:    subl $16, %esp
1920; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1921; X86-SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
1922; X86-SSE2-NEXT:    pcmpeqb %xmm3, %xmm2
1923; X86-SSE2-NEXT:    pcmpeqb %xmm3, %xmm0
1924; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
1925; X86-SSE2-NEXT:    pcmpeqb 8(%ebp), %xmm3
1926; X86-SSE2-NEXT:    pxor %xmm1, %xmm3
1927; X86-SSE2-NEXT:    pxor %xmm0, %xmm3
1928; X86-SSE2-NEXT:    pmovmskb %xmm3, %eax
1929; X86-SSE2-NEXT:    xorb %ah, %al
1930; X86-SSE2-NEXT:    setnp %al
1931; X86-SSE2-NEXT:    movl %ebp, %esp
1932; X86-SSE2-NEXT:    popl %ebp
1933; X86-SSE2-NEXT:    retl
1934;
1935; X64-SSE-LABEL: icmp0_v64i8_v64i1:
1936; X64-SSE:       # %bb.0:
1937; X64-SSE-NEXT:    pxor %xmm4, %xmm4
1938; X64-SSE-NEXT:    pcmpeqb %xmm4, %xmm2
1939; X64-SSE-NEXT:    pcmpeqb %xmm4, %xmm0
1940; X64-SSE-NEXT:    pxor %xmm2, %xmm0
1941; X64-SSE-NEXT:    pcmpeqb %xmm4, %xmm3
1942; X64-SSE-NEXT:    pcmpeqb %xmm4, %xmm1
1943; X64-SSE-NEXT:    pxor %xmm3, %xmm1
1944; X64-SSE-NEXT:    pxor %xmm0, %xmm1
1945; X64-SSE-NEXT:    pmovmskb %xmm1, %eax
1946; X64-SSE-NEXT:    xorb %ah, %al
1947; X64-SSE-NEXT:    setnp %al
1948; X64-SSE-NEXT:    retq
1949;
1950; AVX1-LABEL: icmp0_v64i8_v64i1:
1951; AVX1:       # %bb.0:
1952; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1953; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm3
1954; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm4
1955; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
1956; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
1957; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
1958; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1959; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
1960; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1961; AVX1-NEXT:    vpxor %xmm0, %xmm3, %xmm0
1962; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1963; AVX1-NEXT:    xorb %ah, %al
1964; AVX1-NEXT:    setnp %al
1965; AVX1-NEXT:    vzeroupper
1966; AVX1-NEXT:    retq
1967;
1968; AVX2-LABEL: icmp0_v64i8_v64i1:
1969; AVX2:       # %bb.0:
1970; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1971; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
1972; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
1973; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
1974; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1975; AVX2-NEXT:    movl %eax, %ecx
1976; AVX2-NEXT:    shrl $16, %ecx
1977; AVX2-NEXT:    xorl %eax, %ecx
1978; AVX2-NEXT:    xorb %ch, %cl
1979; AVX2-NEXT:    setnp %al
1980; AVX2-NEXT:    vzeroupper
1981; AVX2-NEXT:    retq
1982;
1983; AVX512F-LABEL: icmp0_v64i8_v64i1:
1984; AVX512F:       # %bb.0:
1985; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1986; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1987; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
1988; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
1989; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
1990; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
1991; AVX512F-NEXT:    vpxor %xmm2, %xmm3, %xmm2
1992; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1993; AVX512F-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1994; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1995; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1996; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1997; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
1998; AVX512F-NEXT:    kxorw %k1, %k0, %k0
1999; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
2000; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2001; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
2002; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2003; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
2004; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2005; AVX512F-NEXT:    kmovw %k0, %eax
2006; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
2007; AVX512F-NEXT:    vzeroupper
2008; AVX512F-NEXT:    retq
2009;
2010; AVX512BW-LABEL: icmp0_v64i8_v64i1:
2011; AVX512BW:       # %bb.0:
2012; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
2013; AVX512BW-NEXT:    kmovq %k0, %rax
2014; AVX512BW-NEXT:    movq %rax, %rcx
2015; AVX512BW-NEXT:    shrq $32, %rcx
2016; AVX512BW-NEXT:    xorl %eax, %ecx
2017; AVX512BW-NEXT:    movl %ecx, %eax
2018; AVX512BW-NEXT:    shrl $16, %eax
2019; AVX512BW-NEXT:    xorl %ecx, %eax
2020; AVX512BW-NEXT:    xorb %ah, %al
2021; AVX512BW-NEXT:    setnp %al
2022; AVX512BW-NEXT:    vzeroupper
2023; AVX512BW-NEXT:    retq
2024;
2025; AVX512VL-LABEL: icmp0_v64i8_v64i1:
2026; AVX512VL:       # %bb.0:
2027; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
2028; AVX512VL-NEXT:    kmovq %k0, %rax
2029; AVX512VL-NEXT:    movq %rax, %rcx
2030; AVX512VL-NEXT:    shrq $32, %rcx
2031; AVX512VL-NEXT:    xorl %eax, %ecx
2032; AVX512VL-NEXT:    movl %ecx, %eax
2033; AVX512VL-NEXT:    shrl $16, %eax
2034; AVX512VL-NEXT:    xorl %ecx, %eax
2035; AVX512VL-NEXT:    xorb %ah, %al
2036; AVX512VL-NEXT:    setnp %al
2037; AVX512VL-NEXT:    vzeroupper
2038; AVX512VL-NEXT:    retq
2039  %a = icmp eq <64 x i8> %0, zeroinitializer
2040  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
2041  ret i1 %b
2042}
2043
2044; Comparison
2045;
2046
2047define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) nounwind {
2048; SSE2-LABEL: icmp_v2i64_v2i1:
2049; SSE2:       # %bb.0:
2050; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
2051; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2052; SSE2-NEXT:    pand %xmm0, %xmm1
2053; SSE2-NEXT:    movmskpd %xmm1, %eax
2054; SSE2-NEXT:    testb %al, %al
2055; SSE2-NEXT:    setnp %al
2056; SSE2-NEXT:    ret{{[l|q]}}
2057;
2058; SSE41-LABEL: icmp_v2i64_v2i1:
2059; SSE41:       # %bb.0:
2060; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
2061; SSE41-NEXT:    movmskpd %xmm0, %eax
2062; SSE41-NEXT:    testb %al, %al
2063; SSE41-NEXT:    setnp %al
2064; SSE41-NEXT:    retq
2065;
2066; AVX-LABEL: icmp_v2i64_v2i1:
2067; AVX:       # %bb.0:
2068; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
2069; AVX-NEXT:    vmovmskpd %xmm0, %eax
2070; AVX-NEXT:    testb %al, %al
2071; AVX-NEXT:    setnp %al
2072; AVX-NEXT:    retq
2073;
2074; AVX512F-LABEL: icmp_v2i64_v2i1:
2075; AVX512F:       # %bb.0:
2076; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2077; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2078; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2079; AVX512F-NEXT:    kmovw %k0, %eax
2080; AVX512F-NEXT:    testb $3, %al
2081; AVX512F-NEXT:    setnp %al
2082; AVX512F-NEXT:    vzeroupper
2083; AVX512F-NEXT:    retq
2084;
2085; AVX512BW-LABEL: icmp_v2i64_v2i1:
2086; AVX512BW:       # %bb.0:
2087; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2088; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2089; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2090; AVX512BW-NEXT:    kmovd %k0, %eax
2091; AVX512BW-NEXT:    testb $3, %al
2092; AVX512BW-NEXT:    setnp %al
2093; AVX512BW-NEXT:    vzeroupper
2094; AVX512BW-NEXT:    retq
2095;
2096; AVX512VL-LABEL: icmp_v2i64_v2i1:
2097; AVX512VL:       # %bb.0:
2098; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
2099; AVX512VL-NEXT:    kmovd %k0, %eax
2100; AVX512VL-NEXT:    testb %al, %al
2101; AVX512VL-NEXT:    setnp %al
2102; AVX512VL-NEXT:    retq
2103  %a = icmp eq <2 x i64> %0, %1
2104  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
2105  ret i1 %b
2106}
2107
2108define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) nounwind {
2109; SSE-LABEL: icmp_v4i32_v4i1:
2110; SSE:       # %bb.0:
2111; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
2112; SSE-NEXT:    movmskps %xmm0, %eax
2113; SSE-NEXT:    testb %al, %al
2114; SSE-NEXT:    setnp %al
2115; SSE-NEXT:    ret{{[l|q]}}
2116;
2117; AVX-LABEL: icmp_v4i32_v4i1:
2118; AVX:       # %bb.0:
2119; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2120; AVX-NEXT:    vmovmskps %xmm0, %eax
2121; AVX-NEXT:    testb %al, %al
2122; AVX-NEXT:    setnp %al
2123; AVX-NEXT:    retq
2124;
2125; AVX512F-LABEL: icmp_v4i32_v4i1:
2126; AVX512F:       # %bb.0:
2127; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2128; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2129; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2130; AVX512F-NEXT:    kmovw %k0, %eax
2131; AVX512F-NEXT:    testb $15, %al
2132; AVX512F-NEXT:    setnp %al
2133; AVX512F-NEXT:    vzeroupper
2134; AVX512F-NEXT:    retq
2135;
2136; AVX512BW-LABEL: icmp_v4i32_v4i1:
2137; AVX512BW:       # %bb.0:
2138; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2139; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2140; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2141; AVX512BW-NEXT:    kmovd %k0, %eax
2142; AVX512BW-NEXT:    testb $15, %al
2143; AVX512BW-NEXT:    setnp %al
2144; AVX512BW-NEXT:    vzeroupper
2145; AVX512BW-NEXT:    retq
2146;
2147; AVX512VL-LABEL: icmp_v4i32_v4i1:
2148; AVX512VL:       # %bb.0:
2149; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
2150; AVX512VL-NEXT:    kmovd %k0, %eax
2151; AVX512VL-NEXT:    testb %al, %al
2152; AVX512VL-NEXT:    setnp %al
2153; AVX512VL-NEXT:    retq
2154  %a = icmp eq <4 x i32> %0, %1
2155  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
2156  ret i1 %b
2157}
2158
2159define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) nounwind {
2160; SSE-LABEL: icmp_v8i16_v8i1:
2161; SSE:       # %bb.0:
2162; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
2163; SSE-NEXT:    packsswb %xmm0, %xmm0
2164; SSE-NEXT:    pmovmskb %xmm0, %eax
2165; SSE-NEXT:    testb %al, %al
2166; SSE-NEXT:    setnp %al
2167; SSE-NEXT:    ret{{[l|q]}}
2168;
2169; AVX-LABEL: icmp_v8i16_v8i1:
2170; AVX:       # %bb.0:
2171; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
2172; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2173; AVX-NEXT:    vpmovmskb %xmm0, %eax
2174; AVX-NEXT:    testb %al, %al
2175; AVX-NEXT:    setnp %al
2176; AVX-NEXT:    retq
2177;
2178; AVX512F-LABEL: icmp_v8i16_v8i1:
2179; AVX512F:       # %bb.0:
2180; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
2181; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
2182; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
2183; AVX512F-NEXT:    kmovw %k0, %eax
2184; AVX512F-NEXT:    testb %al, %al
2185; AVX512F-NEXT:    setnp %al
2186; AVX512F-NEXT:    vzeroupper
2187; AVX512F-NEXT:    retq
2188;
2189; AVX512BW-LABEL: icmp_v8i16_v8i1:
2190; AVX512BW:       # %bb.0:
2191; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2192; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2193; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
2194; AVX512BW-NEXT:    kmovd %k0, %eax
2195; AVX512BW-NEXT:    testb %al, %al
2196; AVX512BW-NEXT:    setnp %al
2197; AVX512BW-NEXT:    vzeroupper
2198; AVX512BW-NEXT:    retq
2199;
2200; AVX512VL-LABEL: icmp_v8i16_v8i1:
2201; AVX512VL:       # %bb.0:
2202; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
2203; AVX512VL-NEXT:    kmovd %k0, %eax
2204; AVX512VL-NEXT:    testb %al, %al
2205; AVX512VL-NEXT:    setnp %al
2206; AVX512VL-NEXT:    retq
2207  %a = icmp eq <8 x i16> %0, %1
2208  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2209  ret i1 %b
2210}
2211
2212define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) nounwind {
2213; SSE-LABEL: icmp_v16i8_v16i1:
2214; SSE:       # %bb.0:
2215; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
2216; SSE-NEXT:    pmovmskb %xmm0, %eax
2217; SSE-NEXT:    xorb %ah, %al
2218; SSE-NEXT:    setnp %al
2219; SSE-NEXT:    ret{{[l|q]}}
2220;
2221; AVX-LABEL: icmp_v16i8_v16i1:
2222; AVX:       # %bb.0:
2223; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
2224; AVX-NEXT:    vpmovmskb %xmm0, %eax
2225; AVX-NEXT:    xorb %ah, %al
2226; AVX-NEXT:    setnp %al
2227; AVX-NEXT:    retq
2228;
2229; AVX512F-LABEL: icmp_v16i8_v16i1:
2230; AVX512F:       # %bb.0:
2231; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
2232; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
2233; AVX512F-NEXT:    xorb %ah, %al
2234; AVX512F-NEXT:    setnp %al
2235; AVX512F-NEXT:    retq
2236;
2237; AVX512BW-LABEL: icmp_v16i8_v16i1:
2238; AVX512BW:       # %bb.0:
2239; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2240; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2241; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
2242; AVX512BW-NEXT:    kmovd %k0, %eax
2243; AVX512BW-NEXT:    movl %eax, %ecx
2244; AVX512BW-NEXT:    shrl $8, %ecx
2245; AVX512BW-NEXT:    xorb %al, %cl
2246; AVX512BW-NEXT:    setnp %al
2247; AVX512BW-NEXT:    vzeroupper
2248; AVX512BW-NEXT:    retq
2249;
2250; AVX512VL-LABEL: icmp_v16i8_v16i1:
2251; AVX512VL:       # %bb.0:
2252; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
2253; AVX512VL-NEXT:    kmovd %k0, %eax
2254; AVX512VL-NEXT:    movl %eax, %ecx
2255; AVX512VL-NEXT:    shrl $8, %ecx
2256; AVX512VL-NEXT:    xorb %al, %cl
2257; AVX512VL-NEXT:    setnp %al
2258; AVX512VL-NEXT:    retq
2259  %a = icmp eq <16 x i8> %0, %1
2260  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2261  ret i1 %b
2262}
2263
2264define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) nounwind {
2265; X86-SSE2-LABEL: icmp_v4i64_v4i1:
2266; X86-SSE2:       # %bb.0:
2267; X86-SSE2-NEXT:    pushl %ebp
2268; X86-SSE2-NEXT:    movl %esp, %ebp
2269; X86-SSE2-NEXT:    andl $-16, %esp
2270; X86-SSE2-NEXT:    subl $16, %esp
2271; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2272; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm1
2273; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
2274; X86-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
2275; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2276; X86-SSE2-NEXT:    andps %xmm2, %xmm0
2277; X86-SSE2-NEXT:    movmskps %xmm0, %eax
2278; X86-SSE2-NEXT:    testb %al, %al
2279; X86-SSE2-NEXT:    setnp %al
2280; X86-SSE2-NEXT:    movl %ebp, %esp
2281; X86-SSE2-NEXT:    popl %ebp
2282; X86-SSE2-NEXT:    retl
2283;
2284; X64-SSE2-LABEL: icmp_v4i64_v4i1:
2285; X64-SSE2:       # %bb.0:
2286; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
2287; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2288; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2289; X64-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
2290; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2291; X64-SSE2-NEXT:    andps %xmm2, %xmm0
2292; X64-SSE2-NEXT:    movmskps %xmm0, %eax
2293; X64-SSE2-NEXT:    testb %al, %al
2294; X64-SSE2-NEXT:    setnp %al
2295; X64-SSE2-NEXT:    retq
2296;
2297; SSE41-LABEL: icmp_v4i64_v4i1:
2298; SSE41:       # %bb.0:
2299; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
2300; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
2301; SSE41-NEXT:    packssdw %xmm1, %xmm0
2302; SSE41-NEXT:    movmskps %xmm0, %eax
2303; SSE41-NEXT:    testb %al, %al
2304; SSE41-NEXT:    setnp %al
2305; SSE41-NEXT:    retq
2306;
2307; AVX1-LABEL: icmp_v4i64_v4i1:
2308; AVX1:       # %bb.0:
2309; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2310; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2311; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
2312; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
2313; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2314; AVX1-NEXT:    vmovmskpd %ymm0, %eax
2315; AVX1-NEXT:    testb %al, %al
2316; AVX1-NEXT:    setnp %al
2317; AVX1-NEXT:    vzeroupper
2318; AVX1-NEXT:    retq
2319;
2320; AVX2-LABEL: icmp_v4i64_v4i1:
2321; AVX2:       # %bb.0:
2322; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
2323; AVX2-NEXT:    vmovmskpd %ymm0, %eax
2324; AVX2-NEXT:    testb %al, %al
2325; AVX2-NEXT:    setnp %al
2326; AVX2-NEXT:    vzeroupper
2327; AVX2-NEXT:    retq
2328;
2329; AVX512F-LABEL: icmp_v4i64_v4i1:
2330; AVX512F:       # %bb.0:
2331; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2332; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2333; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2334; AVX512F-NEXT:    kmovw %k0, %eax
2335; AVX512F-NEXT:    testb $15, %al
2336; AVX512F-NEXT:    setnp %al
2337; AVX512F-NEXT:    vzeroupper
2338; AVX512F-NEXT:    retq
2339;
2340; AVX512BW-LABEL: icmp_v4i64_v4i1:
2341; AVX512BW:       # %bb.0:
2342; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2343; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2344; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2345; AVX512BW-NEXT:    kmovd %k0, %eax
2346; AVX512BW-NEXT:    testb $15, %al
2347; AVX512BW-NEXT:    setnp %al
2348; AVX512BW-NEXT:    vzeroupper
2349; AVX512BW-NEXT:    retq
2350;
2351; AVX512VL-LABEL: icmp_v4i64_v4i1:
2352; AVX512VL:       # %bb.0:
2353; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
2354; AVX512VL-NEXT:    kmovd %k0, %eax
2355; AVX512VL-NEXT:    testb %al, %al
2356; AVX512VL-NEXT:    setnp %al
2357; AVX512VL-NEXT:    vzeroupper
2358; AVX512VL-NEXT:    retq
2359  %a = icmp eq <4 x i64> %0, %1
2360  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
2361  ret i1 %b
2362}
2363
2364define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) nounwind {
2365; X86-SSE2-LABEL: icmp_v8i32_v8i1:
2366; X86-SSE2:       # %bb.0:
2367; X86-SSE2-NEXT:    pushl %ebp
2368; X86-SSE2-NEXT:    movl %esp, %ebp
2369; X86-SSE2-NEXT:    andl $-16, %esp
2370; X86-SSE2-NEXT:    subl $16, %esp
2371; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2372; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm1
2373; X86-SSE2-NEXT:    packssdw %xmm1, %xmm0
2374; X86-SSE2-NEXT:    packsswb %xmm0, %xmm0
2375; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2376; X86-SSE2-NEXT:    testb %al, %al
2377; X86-SSE2-NEXT:    setnp %al
2378; X86-SSE2-NEXT:    movl %ebp, %esp
2379; X86-SSE2-NEXT:    popl %ebp
2380; X86-SSE2-NEXT:    retl
2381;
2382; X64-SSE-LABEL: icmp_v8i32_v8i1:
2383; X64-SSE:       # %bb.0:
2384; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm1
2385; X64-SSE-NEXT:    pcmpeqd %xmm2, %xmm0
2386; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
2387; X64-SSE-NEXT:    packsswb %xmm0, %xmm0
2388; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
2389; X64-SSE-NEXT:    testb %al, %al
2390; X64-SSE-NEXT:    setnp %al
2391; X64-SSE-NEXT:    retq
2392;
2393; AVX1-LABEL: icmp_v8i32_v8i1:
2394; AVX1:       # %bb.0:
2395; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2396; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2397; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
2398; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2399; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2400; AVX1-NEXT:    vmovmskps %ymm0, %eax
2401; AVX1-NEXT:    testb %al, %al
2402; AVX1-NEXT:    setnp %al
2403; AVX1-NEXT:    vzeroupper
2404; AVX1-NEXT:    retq
2405;
2406; AVX2-LABEL: icmp_v8i32_v8i1:
2407; AVX2:       # %bb.0:
2408; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
2409; AVX2-NEXT:    vmovmskps %ymm0, %eax
2410; AVX2-NEXT:    testb %al, %al
2411; AVX2-NEXT:    setnp %al
2412; AVX2-NEXT:    vzeroupper
2413; AVX2-NEXT:    retq
2414;
2415; AVX512F-LABEL: icmp_v8i32_v8i1:
2416; AVX512F:       # %bb.0:
2417; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2418; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2419; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2420; AVX512F-NEXT:    kmovw %k0, %eax
2421; AVX512F-NEXT:    testb %al, %al
2422; AVX512F-NEXT:    setnp %al
2423; AVX512F-NEXT:    vzeroupper
2424; AVX512F-NEXT:    retq
2425;
2426; AVX512BW-LABEL: icmp_v8i32_v8i1:
2427; AVX512BW:       # %bb.0:
2428; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2429; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2430; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2431; AVX512BW-NEXT:    kmovd %k0, %eax
2432; AVX512BW-NEXT:    testb %al, %al
2433; AVX512BW-NEXT:    setnp %al
2434; AVX512BW-NEXT:    vzeroupper
2435; AVX512BW-NEXT:    retq
2436;
2437; AVX512VL-LABEL: icmp_v8i32_v8i1:
2438; AVX512VL:       # %bb.0:
2439; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
2440; AVX512VL-NEXT:    kmovd %k0, %eax
2441; AVX512VL-NEXT:    testb %al, %al
2442; AVX512VL-NEXT:    setnp %al
2443; AVX512VL-NEXT:    vzeroupper
2444; AVX512VL-NEXT:    retq
2445  %a = icmp eq <8 x i32> %0, %1
2446  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2447  ret i1 %b
2448}
2449
2450define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) nounwind {
2451; X86-SSE2-LABEL: icmp_v16i16_v16i1:
2452; X86-SSE2:       # %bb.0:
2453; X86-SSE2-NEXT:    pushl %ebp
2454; X86-SSE2-NEXT:    movl %esp, %ebp
2455; X86-SSE2-NEXT:    andl $-16, %esp
2456; X86-SSE2-NEXT:    subl $16, %esp
2457; X86-SSE2-NEXT:    pcmpeqw %xmm2, %xmm0
2458; X86-SSE2-NEXT:    pcmpeqw 8(%ebp), %xmm1
2459; X86-SSE2-NEXT:    packsswb %xmm1, %xmm0
2460; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2461; X86-SSE2-NEXT:    xorb %ah, %al
2462; X86-SSE2-NEXT:    setnp %al
2463; X86-SSE2-NEXT:    movl %ebp, %esp
2464; X86-SSE2-NEXT:    popl %ebp
2465; X86-SSE2-NEXT:    retl
2466;
2467; X64-SSE-LABEL: icmp_v16i16_v16i1:
2468; X64-SSE:       # %bb.0:
2469; X64-SSE-NEXT:    pcmpeqw %xmm3, %xmm1
2470; X64-SSE-NEXT:    pcmpeqw %xmm2, %xmm0
2471; X64-SSE-NEXT:    packsswb %xmm1, %xmm0
2472; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
2473; X64-SSE-NEXT:    xorb %ah, %al
2474; X64-SSE-NEXT:    setnp %al
2475; X64-SSE-NEXT:    retq
2476;
2477; AVX1-LABEL: icmp_v16i16_v16i1:
2478; AVX1:       # %bb.0:
2479; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2480; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2481; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
2482; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
2483; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
2484; AVX1-NEXT:    vpmovmskb %xmm0, %eax
2485; AVX1-NEXT:    xorb %ah, %al
2486; AVX1-NEXT:    setnp %al
2487; AVX1-NEXT:    vzeroupper
2488; AVX1-NEXT:    retq
2489;
2490; AVX2-LABEL: icmp_v16i16_v16i1:
2491; AVX2:       # %bb.0:
2492; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2493; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2494; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2495; AVX2-NEXT:    vpmovmskb %xmm0, %eax
2496; AVX2-NEXT:    xorb %ah, %al
2497; AVX2-NEXT:    setnp %al
2498; AVX2-NEXT:    vzeroupper
2499; AVX2-NEXT:    retq
2500;
2501; AVX512F-LABEL: icmp_v16i16_v16i1:
2502; AVX512F:       # %bb.0:
2503; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2504; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
2505; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
2506; AVX512F-NEXT:    kmovw %k0, %eax
2507; AVX512F-NEXT:    movl %eax, %ecx
2508; AVX512F-NEXT:    shrl $8, %ecx
2509; AVX512F-NEXT:    xorb %al, %cl
2510; AVX512F-NEXT:    setnp %al
2511; AVX512F-NEXT:    vzeroupper
2512; AVX512F-NEXT:    retq
2513;
2514; AVX512BW-LABEL: icmp_v16i16_v16i1:
2515; AVX512BW:       # %bb.0:
2516; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2517; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2518; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
2519; AVX512BW-NEXT:    kmovd %k0, %eax
2520; AVX512BW-NEXT:    movl %eax, %ecx
2521; AVX512BW-NEXT:    shrl $8, %ecx
2522; AVX512BW-NEXT:    xorb %al, %cl
2523; AVX512BW-NEXT:    setnp %al
2524; AVX512BW-NEXT:    vzeroupper
2525; AVX512BW-NEXT:    retq
2526;
2527; AVX512VL-LABEL: icmp_v16i16_v16i1:
2528; AVX512VL:       # %bb.0:
2529; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
2530; AVX512VL-NEXT:    kmovd %k0, %eax
2531; AVX512VL-NEXT:    movl %eax, %ecx
2532; AVX512VL-NEXT:    shrl $8, %ecx
2533; AVX512VL-NEXT:    xorb %al, %cl
2534; AVX512VL-NEXT:    setnp %al
2535; AVX512VL-NEXT:    vzeroupper
2536; AVX512VL-NEXT:    retq
2537  %a = icmp eq <16 x i16> %0, %1
2538  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2539  ret i1 %b
2540}
2541
2542define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) nounwind {
2543; X86-SSE2-LABEL: icmp_v32i8_v32i1:
2544; X86-SSE2:       # %bb.0:
2545; X86-SSE2-NEXT:    pushl %ebp
2546; X86-SSE2-NEXT:    movl %esp, %ebp
2547; X86-SSE2-NEXT:    andl $-16, %esp
2548; X86-SSE2-NEXT:    subl $16, %esp
2549; X86-SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
2550; X86-SSE2-NEXT:    pcmpeqb 8(%ebp), %xmm1
2551; X86-SSE2-NEXT:    pxor %xmm0, %xmm1
2552; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
2553; X86-SSE2-NEXT:    xorb %ah, %al
2554; X86-SSE2-NEXT:    setnp %al
2555; X86-SSE2-NEXT:    movl %ebp, %esp
2556; X86-SSE2-NEXT:    popl %ebp
2557; X86-SSE2-NEXT:    retl
2558;
2559; X64-SSE-LABEL: icmp_v32i8_v32i1:
2560; X64-SSE:       # %bb.0:
2561; X64-SSE-NEXT:    pcmpeqb %xmm3, %xmm1
2562; X64-SSE-NEXT:    pcmpeqb %xmm2, %xmm0
2563; X64-SSE-NEXT:    pxor %xmm1, %xmm0
2564; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
2565; X64-SSE-NEXT:    xorb %ah, %al
2566; X64-SSE-NEXT:    setnp %al
2567; X64-SSE-NEXT:    retq
2568;
2569; AVX1-LABEL: icmp_v32i8_v32i1:
2570; AVX1:       # %bb.0:
2571; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2572; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2573; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
2574; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
2575; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
2576; AVX1-NEXT:    vpmovmskb %xmm0, %eax
2577; AVX1-NEXT:    xorb %ah, %al
2578; AVX1-NEXT:    setnp %al
2579; AVX1-NEXT:    vzeroupper
2580; AVX1-NEXT:    retq
2581;
2582; AVX2-LABEL: icmp_v32i8_v32i1:
2583; AVX2:       # %bb.0:
2584; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
2585; AVX2-NEXT:    vpmovmskb %ymm0, %eax
2586; AVX2-NEXT:    movl %eax, %ecx
2587; AVX2-NEXT:    shrl $16, %ecx
2588; AVX2-NEXT:    xorl %eax, %ecx
2589; AVX2-NEXT:    xorb %ch, %cl
2590; AVX2-NEXT:    setnp %al
2591; AVX2-NEXT:    vzeroupper
2592; AVX2-NEXT:    retq
2593;
2594; AVX512F-LABEL: icmp_v32i8_v32i1:
2595; AVX512F:       # %bb.0:
2596; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
2597; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
2598; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2599; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
2600; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
2601; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
2602; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2603; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
2604; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2605; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
2606; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2607; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
2608; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2609; AVX512F-NEXT:    kmovw %k0, %eax
2610; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
2611; AVX512F-NEXT:    vzeroupper
2612; AVX512F-NEXT:    retq
2613;
2614; AVX512BW-LABEL: icmp_v32i8_v32i1:
2615; AVX512BW:       # %bb.0:
2616; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2617; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2618; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
2619; AVX512BW-NEXT:    kmovd %k0, %eax
2620; AVX512BW-NEXT:    movl %eax, %ecx
2621; AVX512BW-NEXT:    shrl $16, %ecx
2622; AVX512BW-NEXT:    xorl %eax, %ecx
2623; AVX512BW-NEXT:    xorb %ch, %cl
2624; AVX512BW-NEXT:    setnp %al
2625; AVX512BW-NEXT:    vzeroupper
2626; AVX512BW-NEXT:    retq
2627;
2628; AVX512VL-LABEL: icmp_v32i8_v32i1:
2629; AVX512VL:       # %bb.0:
2630; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
2631; AVX512VL-NEXT:    kmovd %k0, %eax
2632; AVX512VL-NEXT:    movl %eax, %ecx
2633; AVX512VL-NEXT:    shrl $16, %ecx
2634; AVX512VL-NEXT:    xorl %eax, %ecx
2635; AVX512VL-NEXT:    xorb %ch, %cl
2636; AVX512VL-NEXT:    setnp %al
2637; AVX512VL-NEXT:    vzeroupper
2638; AVX512VL-NEXT:    retq
2639  %a = icmp eq <32 x i8> %0, %1
2640  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
2641  ret i1 %b
2642}
2643
2644define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) nounwind {
2645; X86-SSE2-LABEL: icmp_v8i64_v8i1:
2646; X86-SSE2:       # %bb.0:
2647; X86-SSE2-NEXT:    pushl %ebp
2648; X86-SSE2-NEXT:    movl %esp, %ebp
2649; X86-SSE2-NEXT:    andl $-16, %esp
2650; X86-SSE2-NEXT:    subl $16, %esp
2651; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2652; X86-SSE2-NEXT:    pcmpeqd 72(%ebp), %xmm3
2653; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
2654; X86-SSE2-NEXT:    pand %xmm3, %xmm4
2655; X86-SSE2-NEXT:    pcmpeqd 56(%ebp), %xmm2
2656; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
2657; X86-SSE2-NEXT:    pand %xmm2, %xmm3
2658; X86-SSE2-NEXT:    packssdw %xmm4, %xmm3
2659; X86-SSE2-NEXT:    pcmpeqd 40(%ebp), %xmm1
2660; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
2661; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2662; X86-SSE2-NEXT:    pcmpeqd 24(%ebp), %xmm0
2663; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2664; X86-SSE2-NEXT:    pand %xmm0, %xmm1
2665; X86-SSE2-NEXT:    packssdw %xmm2, %xmm1
2666; X86-SSE2-NEXT:    packssdw %xmm3, %xmm1
2667; X86-SSE2-NEXT:    packsswb %xmm1, %xmm1
2668; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
2669; X86-SSE2-NEXT:    testb %al, %al
2670; X86-SSE2-NEXT:    setnp %al
2671; X86-SSE2-NEXT:    movl %ebp, %esp
2672; X86-SSE2-NEXT:    popl %ebp
2673; X86-SSE2-NEXT:    retl
2674;
2675; X64-SSE2-LABEL: icmp_v8i64_v8i1:
2676; X64-SSE2:       # %bb.0:
2677; X64-SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
2678; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
2679; X64-SSE2-NEXT:    pand %xmm3, %xmm7
2680; X64-SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
2681; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
2682; X64-SSE2-NEXT:    pand %xmm2, %xmm3
2683; X64-SSE2-NEXT:    packssdw %xmm7, %xmm3
2684; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
2685; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
2686; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2687; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
2688; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2689; X64-SSE2-NEXT:    pand %xmm0, %xmm1
2690; X64-SSE2-NEXT:    packssdw %xmm2, %xmm1
2691; X64-SSE2-NEXT:    packssdw %xmm3, %xmm1
2692; X64-SSE2-NEXT:    packsswb %xmm1, %xmm1
2693; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
2694; X64-SSE2-NEXT:    testb %al, %al
2695; X64-SSE2-NEXT:    setnp %al
2696; X64-SSE2-NEXT:    retq
2697;
2698; SSE41-LABEL: icmp_v8i64_v8i1:
2699; SSE41:       # %bb.0:
2700; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
2701; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
2702; SSE41-NEXT:    packssdw %xmm3, %xmm2
2703; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
2704; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
2705; SSE41-NEXT:    packssdw %xmm1, %xmm0
2706; SSE41-NEXT:    packssdw %xmm2, %xmm0
2707; SSE41-NEXT:    packsswb %xmm0, %xmm0
2708; SSE41-NEXT:    pmovmskb %xmm0, %eax
2709; SSE41-NEXT:    testb %al, %al
2710; SSE41-NEXT:    setnp %al
2711; SSE41-NEXT:    retq
2712;
2713; AVX1-LABEL: icmp_v8i64_v8i1:
2714; AVX1:       # %bb.0:
2715; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
2716; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
2717; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
2718; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
2719; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
2720; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
2721; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
2722; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
2723; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
2724; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2725; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2726; AVX1-NEXT:    vmovmskps %ymm0, %eax
2727; AVX1-NEXT:    testb %al, %al
2728; AVX1-NEXT:    setnp %al
2729; AVX1-NEXT:    vzeroupper
2730; AVX1-NEXT:    retq
2731;
2732; AVX2-LABEL: icmp_v8i64_v8i1:
2733; AVX2:       # %bb.0:
2734; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
2735; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
2736; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2737; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2738; AVX2-NEXT:    vmovmskps %ymm0, %eax
2739; AVX2-NEXT:    testb %al, %al
2740; AVX2-NEXT:    setnp %al
2741; AVX2-NEXT:    vzeroupper
2742; AVX2-NEXT:    retq
2743;
2744; AVX512F-LABEL: icmp_v8i64_v8i1:
2745; AVX512F:       # %bb.0:
2746; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2747; AVX512F-NEXT:    kmovw %k0, %eax
2748; AVX512F-NEXT:    testb %al, %al
2749; AVX512F-NEXT:    setnp %al
2750; AVX512F-NEXT:    vzeroupper
2751; AVX512F-NEXT:    retq
2752;
2753; AVX512BW-LABEL: icmp_v8i64_v8i1:
2754; AVX512BW:       # %bb.0:
2755; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2756; AVX512BW-NEXT:    kmovd %k0, %eax
2757; AVX512BW-NEXT:    testb %al, %al
2758; AVX512BW-NEXT:    setnp %al
2759; AVX512BW-NEXT:    vzeroupper
2760; AVX512BW-NEXT:    retq
2761;
2762; AVX512VL-LABEL: icmp_v8i64_v8i1:
2763; AVX512VL:       # %bb.0:
2764; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2765; AVX512VL-NEXT:    kmovd %k0, %eax
2766; AVX512VL-NEXT:    testb %al, %al
2767; AVX512VL-NEXT:    setnp %al
2768; AVX512VL-NEXT:    vzeroupper
2769; AVX512VL-NEXT:    retq
2770  %a = icmp eq <8 x i64> %0, %1
2771  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2772  ret i1 %b
2773}
2774
2775define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) nounwind {
2776; X86-SSE2-LABEL: icmp_v16i32_v16i1:
2777; X86-SSE2:       # %bb.0:
2778; X86-SSE2-NEXT:    pushl %ebp
2779; X86-SSE2-NEXT:    movl %esp, %ebp
2780; X86-SSE2-NEXT:    andl $-16, %esp
2781; X86-SSE2-NEXT:    subl $16, %esp
2782; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2783; X86-SSE2-NEXT:    pcmpeqd 72(%ebp), %xmm3
2784; X86-SSE2-NEXT:    pcmpeqd 56(%ebp), %xmm2
2785; X86-SSE2-NEXT:    packssdw %xmm3, %xmm2
2786; X86-SSE2-NEXT:    pcmpeqd 40(%ebp), %xmm1
2787; X86-SSE2-NEXT:    pcmpeqd 24(%ebp), %xmm0
2788; X86-SSE2-NEXT:    packssdw %xmm1, %xmm0
2789; X86-SSE2-NEXT:    packsswb %xmm2, %xmm0
2790; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2791; X86-SSE2-NEXT:    xorb %ah, %al
2792; X86-SSE2-NEXT:    setnp %al
2793; X86-SSE2-NEXT:    movl %ebp, %esp
2794; X86-SSE2-NEXT:    popl %ebp
2795; X86-SSE2-NEXT:    retl
2796;
2797; X64-SSE-LABEL: icmp_v16i32_v16i1:
2798; X64-SSE:       # %bb.0:
2799; X64-SSE-NEXT:    pcmpeqd %xmm7, %xmm3
2800; X64-SSE-NEXT:    pcmpeqd %xmm6, %xmm2
2801; X64-SSE-NEXT:    packssdw %xmm3, %xmm2
2802; X64-SSE-NEXT:    pcmpeqd %xmm5, %xmm1
2803; X64-SSE-NEXT:    pcmpeqd %xmm4, %xmm0
2804; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
2805; X64-SSE-NEXT:    packsswb %xmm2, %xmm0
2806; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
2807; X64-SSE-NEXT:    xorb %ah, %al
2808; X64-SSE-NEXT:    setnp %al
2809; X64-SSE-NEXT:    retq
2810;
2811; AVX1-LABEL: icmp_v16i32_v16i1:
2812; AVX1:       # %bb.0:
2813; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
2814; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
2815; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
2816; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
2817; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
2818; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
2819; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
2820; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
2821; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
2822; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2823; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2824; AVX1-NEXT:    vpmovmskb %xmm0, %eax
2825; AVX1-NEXT:    xorb %ah, %al
2826; AVX1-NEXT:    setnp %al
2827; AVX1-NEXT:    vzeroupper
2828; AVX1-NEXT:    retq
2829;
2830; AVX2-LABEL: icmp_v16i32_v16i1:
2831; AVX2:       # %bb.0:
2832; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
2833; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
2834; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2835; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2836; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2837; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2838; AVX2-NEXT:    vpmovmskb %xmm0, %eax
2839; AVX2-NEXT:    xorb %ah, %al
2840; AVX2-NEXT:    setnp %al
2841; AVX2-NEXT:    vzeroupper
2842; AVX2-NEXT:    retq
2843;
2844; AVX512F-LABEL: icmp_v16i32_v16i1:
2845; AVX512F:       # %bb.0:
2846; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2847; AVX512F-NEXT:    kmovw %k0, %eax
2848; AVX512F-NEXT:    movl %eax, %ecx
2849; AVX512F-NEXT:    shrl $8, %ecx
2850; AVX512F-NEXT:    xorb %al, %cl
2851; AVX512F-NEXT:    setnp %al
2852; AVX512F-NEXT:    vzeroupper
2853; AVX512F-NEXT:    retq
2854;
2855; AVX512BW-LABEL: icmp_v16i32_v16i1:
2856; AVX512BW:       # %bb.0:
2857; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2858; AVX512BW-NEXT:    kmovd %k0, %eax
2859; AVX512BW-NEXT:    movl %eax, %ecx
2860; AVX512BW-NEXT:    shrl $8, %ecx
2861; AVX512BW-NEXT:    xorb %al, %cl
2862; AVX512BW-NEXT:    setnp %al
2863; AVX512BW-NEXT:    vzeroupper
2864; AVX512BW-NEXT:    retq
2865;
2866; AVX512VL-LABEL: icmp_v16i32_v16i1:
2867; AVX512VL:       # %bb.0:
2868; AVX512VL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2869; AVX512VL-NEXT:    kmovd %k0, %eax
2870; AVX512VL-NEXT:    movl %eax, %ecx
2871; AVX512VL-NEXT:    shrl $8, %ecx
2872; AVX512VL-NEXT:    xorb %al, %cl
2873; AVX512VL-NEXT:    setnp %al
2874; AVX512VL-NEXT:    vzeroupper
2875; AVX512VL-NEXT:    retq
2876  %a = icmp eq <16 x i32> %0, %1
2877  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2878  ret i1 %b
2879}
2880
2881define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) nounwind {
2882; X86-SSE2-LABEL: icmp_v32i16_v32i1:
2883; X86-SSE2:       # %bb.0:
2884; X86-SSE2-NEXT:    pushl %ebp
2885; X86-SSE2-NEXT:    movl %esp, %ebp
2886; X86-SSE2-NEXT:    andl $-16, %esp
2887; X86-SSE2-NEXT:    subl $16, %esp
2888; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2889; X86-SSE2-NEXT:    pcmpeqw 56(%ebp), %xmm2
2890; X86-SSE2-NEXT:    pcmpeqw 24(%ebp), %xmm0
2891; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
2892; X86-SSE2-NEXT:    pcmpeqw 72(%ebp), %xmm3
2893; X86-SSE2-NEXT:    pcmpeqw 40(%ebp), %xmm1
2894; X86-SSE2-NEXT:    pxor %xmm3, %xmm1
2895; X86-SSE2-NEXT:    packsswb %xmm1, %xmm0
2896; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2897; X86-SSE2-NEXT:    xorb %ah, %al
2898; X86-SSE2-NEXT:    setnp %al
2899; X86-SSE2-NEXT:    movl %ebp, %esp
2900; X86-SSE2-NEXT:    popl %ebp
2901; X86-SSE2-NEXT:    retl
2902;
2903; X64-SSE-LABEL: icmp_v32i16_v32i1:
2904; X64-SSE:       # %bb.0:
2905; X64-SSE-NEXT:    pcmpeqw %xmm6, %xmm2
2906; X64-SSE-NEXT:    pcmpeqw %xmm4, %xmm0
2907; X64-SSE-NEXT:    pxor %xmm2, %xmm0
2908; X64-SSE-NEXT:    pcmpeqw %xmm7, %xmm3
2909; X64-SSE-NEXT:    pcmpeqw %xmm5, %xmm1
2910; X64-SSE-NEXT:    pxor %xmm3, %xmm1
2911; X64-SSE-NEXT:    packsswb %xmm1, %xmm0
2912; X64-SSE-NEXT:    pmovmskb %xmm0, %eax
2913; X64-SSE-NEXT:    xorb %ah, %al
2914; X64-SSE-NEXT:    setnp %al
2915; X64-SSE-NEXT:    retq
2916;
2917; AVX1-LABEL: icmp_v32i16_v32i1:
2918; AVX1:       # %bb.0:
2919; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm4
2920; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm5
2921; AVX1-NEXT:    vpxor %xmm4, %xmm5, %xmm4
2922; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
2923; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
2924; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
2925; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
2926; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2927; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
2928; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2929; AVX1-NEXT:    vpacksswb %xmm0, %xmm4, %xmm0
2930; AVX1-NEXT:    vpmovmskb %xmm0, %eax
2931; AVX1-NEXT:    xorb %ah, %al
2932; AVX1-NEXT:    setnp %al
2933; AVX1-NEXT:    vzeroupper
2934; AVX1-NEXT:    retq
2935;
2936; AVX2-LABEL: icmp_v32i16_v32i1:
2937; AVX2:       # %bb.0:
2938; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
2939; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
2940; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
2941; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2942; AVX2-NEXT:    vpmovmskb %ymm0, %eax
2943; AVX2-NEXT:    movl %eax, %ecx
2944; AVX2-NEXT:    shrl $16, %ecx
2945; AVX2-NEXT:    xorl %eax, %ecx
2946; AVX2-NEXT:    xorb %ch, %cl
2947; AVX2-NEXT:    setnp %al
2948; AVX2-NEXT:    vzeroupper
2949; AVX2-NEXT:    retq
2950;
2951; AVX512F-LABEL: icmp_v32i16_v32i1:
2952; AVX512F:       # %bb.0:
2953; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2954; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2955; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
2956; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2957; AVX512F-NEXT:    vpxor %ymm2, %ymm0, %ymm0
2958; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
2959; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
2960; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
2961; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2962; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
2963; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2964; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
2965; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2966; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
2967; AVX512F-NEXT:    kxorw %k1, %k0, %k0
2968; AVX512F-NEXT:    kmovw %k0, %eax
2969; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
2970; AVX512F-NEXT:    vzeroupper
2971; AVX512F-NEXT:    retq
2972;
2973; AVX512BW-LABEL: icmp_v32i16_v32i1:
2974; AVX512BW:       # %bb.0:
2975; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
2976; AVX512BW-NEXT:    kmovd %k0, %eax
2977; AVX512BW-NEXT:    movl %eax, %ecx
2978; AVX512BW-NEXT:    shrl $16, %ecx
2979; AVX512BW-NEXT:    xorl %eax, %ecx
2980; AVX512BW-NEXT:    xorb %ch, %cl
2981; AVX512BW-NEXT:    setnp %al
2982; AVX512BW-NEXT:    vzeroupper
2983; AVX512BW-NEXT:    retq
2984;
2985; AVX512VL-LABEL: icmp_v32i16_v32i1:
2986; AVX512VL:       # %bb.0:
2987; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
2988; AVX512VL-NEXT:    kmovd %k0, %eax
2989; AVX512VL-NEXT:    movl %eax, %ecx
2990; AVX512VL-NEXT:    shrl $16, %ecx
2991; AVX512VL-NEXT:    xorl %eax, %ecx
2992; AVX512VL-NEXT:    xorb %ch, %cl
2993; AVX512VL-NEXT:    setnp %al
2994; AVX512VL-NEXT:    vzeroupper
2995; AVX512VL-NEXT:    retq
2996  %a = icmp eq <32 x i16> %0, %1
2997  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
2998  ret i1 %b
2999}
3000
3001define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) nounwind {
3002; X86-SSE2-LABEL: icmp_v64i8_v64i1:
3003; X86-SSE2:       # %bb.0:
3004; X86-SSE2-NEXT:    pushl %ebp
3005; X86-SSE2-NEXT:    movl %esp, %ebp
3006; X86-SSE2-NEXT:    andl $-16, %esp
3007; X86-SSE2-NEXT:    subl $16, %esp
3008; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
3009; X86-SSE2-NEXT:    pcmpeqb 56(%ebp), %xmm2
3010; X86-SSE2-NEXT:    pcmpeqb 24(%ebp), %xmm0
3011; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
3012; X86-SSE2-NEXT:    pcmpeqb 72(%ebp), %xmm3
3013; X86-SSE2-NEXT:    pcmpeqb 40(%ebp), %xmm1
3014; X86-SSE2-NEXT:    pxor %xmm3, %xmm1
3015; X86-SSE2-NEXT:    pxor %xmm0, %xmm1
3016; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
3017; X86-SSE2-NEXT:    xorb %ah, %al
3018; X86-SSE2-NEXT:    setnp %al
3019; X86-SSE2-NEXT:    movl %ebp, %esp
3020; X86-SSE2-NEXT:    popl %ebp
3021; X86-SSE2-NEXT:    retl
3022;
3023; X64-SSE-LABEL: icmp_v64i8_v64i1:
3024; X64-SSE:       # %bb.0:
3025; X64-SSE-NEXT:    pcmpeqb %xmm6, %xmm2
3026; X64-SSE-NEXT:    pcmpeqb %xmm4, %xmm0
3027; X64-SSE-NEXT:    pxor %xmm2, %xmm0
3028; X64-SSE-NEXT:    pcmpeqb %xmm7, %xmm3
3029; X64-SSE-NEXT:    pcmpeqb %xmm5, %xmm1
3030; X64-SSE-NEXT:    pxor %xmm3, %xmm1
3031; X64-SSE-NEXT:    pxor %xmm0, %xmm1
3032; X64-SSE-NEXT:    pmovmskb %xmm1, %eax
3033; X64-SSE-NEXT:    xorb %ah, %al
3034; X64-SSE-NEXT:    setnp %al
3035; X64-SSE-NEXT:    retq
3036;
3037; AVX1-LABEL: icmp_v64i8_v64i1:
3038; AVX1:       # %bb.0:
3039; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
3040; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
3041; AVX1-NEXT:    vpxor %xmm4, %xmm5, %xmm4
3042; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
3043; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
3044; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
3045; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
3046; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3047; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
3048; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
3049; AVX1-NEXT:    vpxor %xmm0, %xmm4, %xmm0
3050; AVX1-NEXT:    vpmovmskb %xmm0, %eax
3051; AVX1-NEXT:    xorb %ah, %al
3052; AVX1-NEXT:    setnp %al
3053; AVX1-NEXT:    vzeroupper
3054; AVX1-NEXT:    retq
3055;
3056; AVX2-LABEL: icmp_v64i8_v64i1:
3057; AVX2:       # %bb.0:
3058; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
3059; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
3060; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
3061; AVX2-NEXT:    vpmovmskb %ymm0, %eax
3062; AVX2-NEXT:    movl %eax, %ecx
3063; AVX2-NEXT:    shrl $16, %ecx
3064; AVX2-NEXT:    xorl %eax, %ecx
3065; AVX2-NEXT:    xorb %ch, %cl
3066; AVX2-NEXT:    setnp %al
3067; AVX2-NEXT:    vzeroupper
3068; AVX2-NEXT:    retq
3069;
3070; AVX512F-LABEL: icmp_v64i8_v64i1:
3071; AVX512F:       # %bb.0:
3072; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
3073; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
3074; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
3075; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
3076; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
3077; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
3078; AVX512F-NEXT:    vpxor %xmm1, %xmm3, %xmm1
3079; AVX512F-NEXT:    vpxor %xmm2, %xmm0, %xmm0
3080; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
3081; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
3082; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
3083; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
3084; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
3085; AVX512F-NEXT:    kxorw %k1, %k0, %k0
3086; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
3087; AVX512F-NEXT:    kxorw %k1, %k0, %k0
3088; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
3089; AVX512F-NEXT:    kxorw %k1, %k0, %k0
3090; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
3091; AVX512F-NEXT:    kxorw %k1, %k0, %k0
3092; AVX512F-NEXT:    kmovw %k0, %eax
3093; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
3094; AVX512F-NEXT:    vzeroupper
3095; AVX512F-NEXT:    retq
3096;
3097; AVX512BW-LABEL: icmp_v64i8_v64i1:
3098; AVX512BW:       # %bb.0:
3099; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
3100; AVX512BW-NEXT:    kmovq %k0, %rax
3101; AVX512BW-NEXT:    movq %rax, %rcx
3102; AVX512BW-NEXT:    shrq $32, %rcx
3103; AVX512BW-NEXT:    xorl %eax, %ecx
3104; AVX512BW-NEXT:    movl %ecx, %eax
3105; AVX512BW-NEXT:    shrl $16, %eax
3106; AVX512BW-NEXT:    xorl %ecx, %eax
3107; AVX512BW-NEXT:    xorb %ah, %al
3108; AVX512BW-NEXT:    setnp %al
3109; AVX512BW-NEXT:    vzeroupper
3110; AVX512BW-NEXT:    retq
3111;
3112; AVX512VL-LABEL: icmp_v64i8_v64i1:
3113; AVX512VL:       # %bb.0:
3114; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
3115; AVX512VL-NEXT:    kmovq %k0, %rax
3116; AVX512VL-NEXT:    movq %rax, %rcx
3117; AVX512VL-NEXT:    shrq $32, %rcx
3118; AVX512VL-NEXT:    xorl %eax, %ecx
3119; AVX512VL-NEXT:    movl %ecx, %eax
3120; AVX512VL-NEXT:    shrl $16, %eax
3121; AVX512VL-NEXT:    xorl %ecx, %eax
3122; AVX512VL-NEXT:    xorb %ah, %al
3123; AVX512VL-NEXT:    setnp %al
3124; AVX512VL-NEXT:    vzeroupper
3125; AVX512VL-NEXT:    retq
3126  %a = icmp eq <64 x i8> %0, %1
3127  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
3128  ret i1 %b
3129}
3130
3131declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
3132declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
3133declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)
3134declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>)
3135declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>)
3136declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>)
3137