xref: /llvm-project/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll (revision 44f316811016e677ca3e6c6237619e71bae28986)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
10
11;
12; Truncate
13;
14
15define i1 @trunc_v2i64_v2i1(<2 x i64>) nounwind {
16; SSE2-LABEL: trunc_v2i64_v2i1:
17; SSE2:       # %bb.0:
18; SSE2-NEXT:    psllq $63, %xmm0
19; SSE2-NEXT:    movmskpd %xmm0, %eax
20; SSE2-NEXT:    cmpl $3, %eax
21; SSE2-NEXT:    sete %al
22; SSE2-NEXT:    ret{{[l|q]}}
23;
24; SSE41-LABEL: trunc_v2i64_v2i1:
25; SSE41:       # %bb.0:
26; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27; SSE41-NEXT:    setb %al
28; SSE41-NEXT:    retq
29;
30; AVX1OR2-LABEL: trunc_v2i64_v2i1:
31; AVX1OR2:       # %bb.0:
32; AVX1OR2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33; AVX1OR2-NEXT:    setb %al
34; AVX1OR2-NEXT:    retq
35;
36; AVX512F-LABEL: trunc_v2i64_v2i1:
37; AVX512F:       # %bb.0:
38; AVX512F-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
39; AVX512F-NEXT:    setb %al
40; AVX512F-NEXT:    retq
41;
42; AVX512BW-LABEL: trunc_v2i64_v2i1:
43; AVX512BW:       # %bb.0:
44; AVX512BW-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
45; AVX512BW-NEXT:    setb %al
46; AVX512BW-NEXT:    retq
47;
48; AVX512VL-LABEL: trunc_v2i64_v2i1:
49; AVX512VL:       # %bb.0:
50; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
51; AVX512VL-NEXT:    vptest %xmm1, %xmm0
52; AVX512VL-NEXT:    setb %al
53; AVX512VL-NEXT:    retq
54  %a = trunc <2 x i64> %0 to <2 x i1>
55  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
56  ret i1 %b
57}
58
59define i1 @trunc_v4i32_v4i1(<4 x i32>) nounwind {
60; SSE2-LABEL: trunc_v4i32_v4i1:
61; SSE2:       # %bb.0:
62; SSE2-NEXT:    pslld $31, %xmm0
63; SSE2-NEXT:    movmskps %xmm0, %eax
64; SSE2-NEXT:    xorl $15, %eax
65; SSE2-NEXT:    sete %al
66; SSE2-NEXT:    ret{{[l|q]}}
67;
68; SSE41-LABEL: trunc_v4i32_v4i1:
69; SSE41:       # %bb.0:
70; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
71; SSE41-NEXT:    setb %al
72; SSE41-NEXT:    retq
73;
74; AVX1OR2-LABEL: trunc_v4i32_v4i1:
75; AVX1OR2:       # %bb.0:
76; AVX1OR2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
77; AVX1OR2-NEXT:    setb %al
78; AVX1OR2-NEXT:    retq
79;
80; AVX512F-LABEL: trunc_v4i32_v4i1:
81; AVX512F:       # %bb.0:
82; AVX512F-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
83; AVX512F-NEXT:    setb %al
84; AVX512F-NEXT:    retq
85;
86; AVX512BW-LABEL: trunc_v4i32_v4i1:
87; AVX512BW:       # %bb.0:
88; AVX512BW-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
89; AVX512BW-NEXT:    setb %al
90; AVX512BW-NEXT:    retq
91;
92; AVX512VL-LABEL: trunc_v4i32_v4i1:
93; AVX512VL:       # %bb.0:
94; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
95; AVX512VL-NEXT:    vptest %xmm1, %xmm0
96; AVX512VL-NEXT:    setb %al
97; AVX512VL-NEXT:    retq
98  %a = trunc <4 x i32> %0 to <4 x i1>
99  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
100  ret i1 %b
101}
102
103define i1 @trunc_v8i16_v8i1(<8 x i16>) nounwind {
104; SSE2-LABEL: trunc_v8i16_v8i1:
105; SSE2:       # %bb.0:
106; SSE2-NEXT:    psllw $7, %xmm0
107; SSE2-NEXT:    pmovmskb %xmm0, %eax
108; SSE2-NEXT:    notl %eax
109; SSE2-NEXT:    testl $21845, %eax # imm = 0x5555
110; SSE2-NEXT:    sete %al
111; SSE2-NEXT:    ret{{[l|q]}}
112;
113; SSE41-LABEL: trunc_v8i16_v8i1:
114; SSE41:       # %bb.0:
115; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
116; SSE41-NEXT:    setb %al
117; SSE41-NEXT:    retq
118;
119; AVX1OR2-LABEL: trunc_v8i16_v8i1:
120; AVX1OR2:       # %bb.0:
121; AVX1OR2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
122; AVX1OR2-NEXT:    setb %al
123; AVX1OR2-NEXT:    retq
124;
125; AVX512F-LABEL: trunc_v8i16_v8i1:
126; AVX512F:       # %bb.0:
127; AVX512F-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
128; AVX512F-NEXT:    setb %al
129; AVX512F-NEXT:    retq
130;
131; AVX512BW-LABEL: trunc_v8i16_v8i1:
132; AVX512BW:       # %bb.0:
133; AVX512BW-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
134; AVX512BW-NEXT:    setb %al
135; AVX512BW-NEXT:    retq
136;
137; AVX512VL-LABEL: trunc_v8i16_v8i1:
138; AVX512VL:       # %bb.0:
139; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
140; AVX512VL-NEXT:    vptest %xmm1, %xmm0
141; AVX512VL-NEXT:    setb %al
142; AVX512VL-NEXT:    retq
143  %a = trunc <8 x i16> %0 to <8 x i1>
144  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
145  ret i1 %b
146}
147
148define i1 @trunc_v16i8_v16i1(<16 x i8>) nounwind {
149; SSE2-LABEL: trunc_v16i8_v16i1:
150; SSE2:       # %bb.0:
151; SSE2-NEXT:    psllw $7, %xmm0
152; SSE2-NEXT:    pmovmskb %xmm0, %eax
153; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
154; SSE2-NEXT:    sete %al
155; SSE2-NEXT:    ret{{[l|q]}}
156;
157; SSE41-LABEL: trunc_v16i8_v16i1:
158; SSE41:       # %bb.0:
159; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
160; SSE41-NEXT:    setb %al
161; SSE41-NEXT:    retq
162;
163; AVX1OR2-LABEL: trunc_v16i8_v16i1:
164; AVX1OR2:       # %bb.0:
165; AVX1OR2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166; AVX1OR2-NEXT:    setb %al
167; AVX1OR2-NEXT:    retq
168;
169; AVX512F-LABEL: trunc_v16i8_v16i1:
170; AVX512F:       # %bb.0:
171; AVX512F-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172; AVX512F-NEXT:    setb %al
173; AVX512F-NEXT:    retq
174;
175; AVX512BW-LABEL: trunc_v16i8_v16i1:
176; AVX512BW:       # %bb.0:
177; AVX512BW-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
178; AVX512BW-NEXT:    setb %al
179; AVX512BW-NEXT:    retq
180;
181; AVX512VL-LABEL: trunc_v16i8_v16i1:
182; AVX512VL:       # %bb.0:
183; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
184; AVX512VL-NEXT:    vptest %xmm1, %xmm0
185; AVX512VL-NEXT:    setb %al
186; AVX512VL-NEXT:    retq
187  %a = trunc <16 x i8> %0 to <16 x i1>
188  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
189  ret i1 %b
190}
191
192define i1 @trunc_v4i64_v4i1(<4 x i64>) nounwind {
193; SSE2-LABEL: trunc_v4i64_v4i1:
194; SSE2:       # %bb.0:
195; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
196; SSE2-NEXT:    pslld $31, %xmm0
197; SSE2-NEXT:    movmskps %xmm0, %eax
198; SSE2-NEXT:    cmpl $15, %eax
199; SSE2-NEXT:    sete %al
200; SSE2-NEXT:    ret{{[l|q]}}
201;
202; SSE41-LABEL: trunc_v4i64_v4i1:
203; SSE41:       # %bb.0:
204; SSE41-NEXT:    pand %xmm1, %xmm0
205; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
206; SSE41-NEXT:    setb %al
207; SSE41-NEXT:    retq
208;
209; AVX1-LABEL: trunc_v4i64_v4i1:
210; AVX1:       # %bb.0:
211; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
212; AVX1-NEXT:    setb %al
213; AVX1-NEXT:    vzeroupper
214; AVX1-NEXT:    retq
215;
216; AVX2-LABEL: trunc_v4i64_v4i1:
217; AVX2:       # %bb.0:
218; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
219; AVX2-NEXT:    vptest %ymm1, %ymm0
220; AVX2-NEXT:    setb %al
221; AVX2-NEXT:    vzeroupper
222; AVX2-NEXT:    retq
223;
224; AVX512-LABEL: trunc_v4i64_v4i1:
225; AVX512:       # %bb.0:
226; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
227; AVX512-NEXT:    vptest %ymm1, %ymm0
228; AVX512-NEXT:    setb %al
229; AVX512-NEXT:    vzeroupper
230; AVX512-NEXT:    retq
231  %a = trunc <4 x i64> %0 to <4 x i1>
232  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
233  ret i1 %b
234}
235
236define i1 @trunc_v8i32_v8i1(<8 x i32>) nounwind {
237; SSE2-LABEL: trunc_v8i32_v8i1:
238; SSE2:       # %bb.0:
239; SSE2-NEXT:    pand %xmm1, %xmm0
240; SSE2-NEXT:    pslld $31, %xmm0
241; SSE2-NEXT:    movmskps %xmm0, %eax
242; SSE2-NEXT:    xorl $15, %eax
243; SSE2-NEXT:    sete %al
244; SSE2-NEXT:    ret{{[l|q]}}
245;
246; SSE41-LABEL: trunc_v8i32_v8i1:
247; SSE41:       # %bb.0:
248; SSE41-NEXT:    pand %xmm1, %xmm0
249; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
250; SSE41-NEXT:    setb %al
251; SSE41-NEXT:    retq
252;
253; AVX1-LABEL: trunc_v8i32_v8i1:
254; AVX1:       # %bb.0:
255; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
256; AVX1-NEXT:    setb %al
257; AVX1-NEXT:    vzeroupper
258; AVX1-NEXT:    retq
259;
260; AVX2-LABEL: trunc_v8i32_v8i1:
261; AVX2:       # %bb.0:
262; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
263; AVX2-NEXT:    vptest %ymm1, %ymm0
264; AVX2-NEXT:    setb %al
265; AVX2-NEXT:    vzeroupper
266; AVX2-NEXT:    retq
267;
268; AVX512-LABEL: trunc_v8i32_v8i1:
269; AVX512:       # %bb.0:
270; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
271; AVX512-NEXT:    vptest %ymm1, %ymm0
272; AVX512-NEXT:    setb %al
273; AVX512-NEXT:    vzeroupper
274; AVX512-NEXT:    retq
275  %a = trunc <8 x i32> %0 to <8 x i1>
276  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
277  ret i1 %b
278}
279
280define i1 @trunc_v16i16_v16i1(<16 x i16>) nounwind {
281; SSE2-LABEL: trunc_v16i16_v16i1:
282; SSE2:       # %bb.0:
283; SSE2-NEXT:    pand %xmm1, %xmm0
284; SSE2-NEXT:    psllw $7, %xmm0
285; SSE2-NEXT:    pmovmskb %xmm0, %eax
286; SSE2-NEXT:    notl %eax
287; SSE2-NEXT:    testl $21845, %eax # imm = 0x5555
288; SSE2-NEXT:    sete %al
289; SSE2-NEXT:    ret{{[l|q]}}
290;
291; SSE41-LABEL: trunc_v16i16_v16i1:
292; SSE41:       # %bb.0:
293; SSE41-NEXT:    pand %xmm1, %xmm0
294; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
295; SSE41-NEXT:    setb %al
296; SSE41-NEXT:    retq
297;
298; AVX1-LABEL: trunc_v16i16_v16i1:
299; AVX1:       # %bb.0:
300; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
301; AVX1-NEXT:    setb %al
302; AVX1-NEXT:    vzeroupper
303; AVX1-NEXT:    retq
304;
305; AVX2-LABEL: trunc_v16i16_v16i1:
306; AVX2:       # %bb.0:
307; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
308; AVX2-NEXT:    vptest %ymm1, %ymm0
309; AVX2-NEXT:    setb %al
310; AVX2-NEXT:    vzeroupper
311; AVX2-NEXT:    retq
312;
313; AVX512-LABEL: trunc_v16i16_v16i1:
314; AVX512:       # %bb.0:
315; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
316; AVX512-NEXT:    vptest %ymm1, %ymm0
317; AVX512-NEXT:    setb %al
318; AVX512-NEXT:    vzeroupper
319; AVX512-NEXT:    retq
320  %a = trunc <16 x i16> %0 to <16 x i1>
321  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
322  ret i1 %b
323}
324
325define i1 @trunc_v32i8_v32i1(<32 x i8>) nounwind {
326; SSE2-LABEL: trunc_v32i8_v32i1:
327; SSE2:       # %bb.0:
328; SSE2-NEXT:    pand %xmm1, %xmm0
329; SSE2-NEXT:    psllw $7, %xmm0
330; SSE2-NEXT:    pmovmskb %xmm0, %eax
331; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
332; SSE2-NEXT:    sete %al
333; SSE2-NEXT:    ret{{[l|q]}}
334;
335; SSE41-LABEL: trunc_v32i8_v32i1:
336; SSE41:       # %bb.0:
337; SSE41-NEXT:    pand %xmm1, %xmm0
338; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339; SSE41-NEXT:    setb %al
340; SSE41-NEXT:    retq
341;
342; AVX1-LABEL: trunc_v32i8_v32i1:
343; AVX1:       # %bb.0:
344; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
345; AVX1-NEXT:    setb %al
346; AVX1-NEXT:    vzeroupper
347; AVX1-NEXT:    retq
348;
349; AVX2-LABEL: trunc_v32i8_v32i1:
350; AVX2:       # %bb.0:
351; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
352; AVX2-NEXT:    vptest %ymm1, %ymm0
353; AVX2-NEXT:    setb %al
354; AVX2-NEXT:    vzeroupper
355; AVX2-NEXT:    retq
356;
357; AVX512-LABEL: trunc_v32i8_v32i1:
358; AVX512:       # %bb.0:
359; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
360; AVX512-NEXT:    vptest %ymm1, %ymm0
361; AVX512-NEXT:    setb %al
362; AVX512-NEXT:    vzeroupper
363; AVX512-NEXT:    retq
364  %a = trunc <32 x i8> %0 to <32 x i1>
365  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
366  ret i1 %b
367}
368
369define i1 @trunc_v8i64_v8i1(<8 x i64>) nounwind {
370; X86-SSE2-LABEL: trunc_v8i64_v8i1:
371; X86-SSE2:       # %bb.0:
372; X86-SSE2-NEXT:    pushl %ebp
373; X86-SSE2-NEXT:    movl %esp, %ebp
374; X86-SSE2-NEXT:    andl $-16, %esp
375; X86-SSE2-NEXT:    subl $16, %esp
376; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
377; X86-SSE2-NEXT:    pslld $16, %xmm0
378; X86-SSE2-NEXT:    psrad $16, %xmm0
379; X86-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],mem[0,2]
380; X86-SSE2-NEXT:    pslld $16, %xmm2
381; X86-SSE2-NEXT:    psrad $16, %xmm2
382; X86-SSE2-NEXT:    packssdw %xmm2, %xmm0
383; X86-SSE2-NEXT:    psllw $15, %xmm0
384; X86-SSE2-NEXT:    packsswb %xmm0, %xmm0
385; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
386; X86-SSE2-NEXT:    cmpb $-1, %al
387; X86-SSE2-NEXT:    sete %al
388; X86-SSE2-NEXT:    movl %ebp, %esp
389; X86-SSE2-NEXT:    popl %ebp
390; X86-SSE2-NEXT:    retl
391;
392; X64-SSE2-LABEL: trunc_v8i64_v8i1:
393; X64-SSE2:       # %bb.0:
394; X64-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
395; X64-SSE2-NEXT:    pslld $16, %xmm2
396; X64-SSE2-NEXT:    psrad $16, %xmm2
397; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
398; X64-SSE2-NEXT:    pslld $16, %xmm0
399; X64-SSE2-NEXT:    psrad $16, %xmm0
400; X64-SSE2-NEXT:    packssdw %xmm2, %xmm0
401; X64-SSE2-NEXT:    psllw $15, %xmm0
402; X64-SSE2-NEXT:    packsswb %xmm0, %xmm0
403; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
404; X64-SSE2-NEXT:    cmpb $-1, %al
405; X64-SSE2-NEXT:    sete %al
406; X64-SSE2-NEXT:    retq
407;
408; SSE41-LABEL: trunc_v8i64_v8i1:
409; SSE41:       # %bb.0:
410; SSE41-NEXT:    pand %xmm3, %xmm1
411; SSE41-NEXT:    pand %xmm2, %xmm0
412; SSE41-NEXT:    pand %xmm1, %xmm0
413; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
414; SSE41-NEXT:    setb %al
415; SSE41-NEXT:    retq
416;
417; AVX1-LABEL: trunc_v8i64_v8i1:
418; AVX1:       # %bb.0:
419; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
420; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
421; AVX1-NEXT:    setb %al
422; AVX1-NEXT:    vzeroupper
423; AVX1-NEXT:    retq
424;
425; AVX2-LABEL: trunc_v8i64_v8i1:
426; AVX2:       # %bb.0:
427; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
428; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
429; AVX2-NEXT:    vptest %ymm1, %ymm0
430; AVX2-NEXT:    setb %al
431; AVX2-NEXT:    vzeroupper
432; AVX2-NEXT:    retq
433;
434; AVX512-LABEL: trunc_v8i64_v8i1:
435; AVX512:       # %bb.0:
436; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
437; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
438; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
439; AVX512-NEXT:    kortestw %k0, %k0
440; AVX512-NEXT:    sete %al
441; AVX512-NEXT:    vzeroupper
442; AVX512-NEXT:    retq
443  %a = trunc <8 x i64> %0 to <8 x i1>
444  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
445  ret i1 %b
446}
447
448define i1 @trunc_v16i32_v16i1(<16 x i32>) nounwind {
449; X86-SSE2-LABEL: trunc_v16i32_v16i1:
450; X86-SSE2:       # %bb.0:
451; X86-SSE2-NEXT:    pushl %ebp
452; X86-SSE2-NEXT:    movl %esp, %ebp
453; X86-SSE2-NEXT:    andl $-16, %esp
454; X86-SSE2-NEXT:    subl $16, %esp
455; X86-SSE2-NEXT:    pand %xmm2, %xmm0
456; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
457; X86-SSE2-NEXT:    pand %xmm0, %xmm1
458; X86-SSE2-NEXT:    pslld $31, %xmm1
459; X86-SSE2-NEXT:    movmskps %xmm1, %eax
460; X86-SSE2-NEXT:    xorl $15, %eax
461; X86-SSE2-NEXT:    sete %al
462; X86-SSE2-NEXT:    movl %ebp, %esp
463; X86-SSE2-NEXT:    popl %ebp
464; X86-SSE2-NEXT:    retl
465;
466; X64-SSE2-LABEL: trunc_v16i32_v16i1:
467; X64-SSE2:       # %bb.0:
468; X64-SSE2-NEXT:    pand %xmm3, %xmm1
469; X64-SSE2-NEXT:    pand %xmm2, %xmm0
470; X64-SSE2-NEXT:    pand %xmm1, %xmm0
471; X64-SSE2-NEXT:    pslld $31, %xmm0
472; X64-SSE2-NEXT:    movmskps %xmm0, %eax
473; X64-SSE2-NEXT:    xorl $15, %eax
474; X64-SSE2-NEXT:    sete %al
475; X64-SSE2-NEXT:    retq
476;
477; SSE41-LABEL: trunc_v16i32_v16i1:
478; SSE41:       # %bb.0:
479; SSE41-NEXT:    pand %xmm3, %xmm1
480; SSE41-NEXT:    pand %xmm2, %xmm0
481; SSE41-NEXT:    pand %xmm1, %xmm0
482; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
483; SSE41-NEXT:    setb %al
484; SSE41-NEXT:    retq
485;
486; AVX1-LABEL: trunc_v16i32_v16i1:
487; AVX1:       # %bb.0:
488; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
489; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
490; AVX1-NEXT:    setb %al
491; AVX1-NEXT:    vzeroupper
492; AVX1-NEXT:    retq
493;
494; AVX2-LABEL: trunc_v16i32_v16i1:
495; AVX2:       # %bb.0:
496; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
497; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
498; AVX2-NEXT:    vptest %ymm1, %ymm0
499; AVX2-NEXT:    setb %al
500; AVX2-NEXT:    vzeroupper
501; AVX2-NEXT:    retq
502;
503; AVX512-LABEL: trunc_v16i32_v16i1:
504; AVX512:       # %bb.0:
505; AVX512-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
506; AVX512-NEXT:    kortestw %k0, %k0
507; AVX512-NEXT:    sete %al
508; AVX512-NEXT:    vzeroupper
509; AVX512-NEXT:    retq
510  %a = trunc <16 x i32> %0 to <16 x i1>
511  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
512  ret i1 %b
513}
514
515define i1 @trunc_v32i16_v32i1(<32 x i16>) nounwind {
516; X86-SSE2-LABEL: trunc_v32i16_v32i1:
517; X86-SSE2:       # %bb.0:
518; X86-SSE2-NEXT:    pushl %ebp
519; X86-SSE2-NEXT:    movl %esp, %ebp
520; X86-SSE2-NEXT:    andl $-16, %esp
521; X86-SSE2-NEXT:    subl $16, %esp
522; X86-SSE2-NEXT:    pand %xmm2, %xmm0
523; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
524; X86-SSE2-NEXT:    pand %xmm0, %xmm1
525; X86-SSE2-NEXT:    psllw $7, %xmm1
526; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
527; X86-SSE2-NEXT:    notl %eax
528; X86-SSE2-NEXT:    testl $21845, %eax # imm = 0x5555
529; X86-SSE2-NEXT:    sete %al
530; X86-SSE2-NEXT:    movl %ebp, %esp
531; X86-SSE2-NEXT:    popl %ebp
532; X86-SSE2-NEXT:    retl
533;
534; X64-SSE2-LABEL: trunc_v32i16_v32i1:
535; X64-SSE2:       # %bb.0:
536; X64-SSE2-NEXT:    pand %xmm3, %xmm1
537; X64-SSE2-NEXT:    pand %xmm2, %xmm0
538; X64-SSE2-NEXT:    pand %xmm1, %xmm0
539; X64-SSE2-NEXT:    psllw $7, %xmm0
540; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
541; X64-SSE2-NEXT:    notl %eax
542; X64-SSE2-NEXT:    testl $21845, %eax # imm = 0x5555
543; X64-SSE2-NEXT:    sete %al
544; X64-SSE2-NEXT:    retq
545;
546; SSE41-LABEL: trunc_v32i16_v32i1:
547; SSE41:       # %bb.0:
548; SSE41-NEXT:    pand %xmm3, %xmm1
549; SSE41-NEXT:    pand %xmm2, %xmm0
550; SSE41-NEXT:    pand %xmm1, %xmm0
551; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552; SSE41-NEXT:    setb %al
553; SSE41-NEXT:    retq
554;
555; AVX1-LABEL: trunc_v32i16_v32i1:
556; AVX1:       # %bb.0:
557; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
558; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
559; AVX1-NEXT:    setb %al
560; AVX1-NEXT:    vzeroupper
561; AVX1-NEXT:    retq
562;
563; AVX2-LABEL: trunc_v32i16_v32i1:
564; AVX2:       # %bb.0:
565; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
566; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
567; AVX2-NEXT:    vptest %ymm1, %ymm0
568; AVX2-NEXT:    setb %al
569; AVX2-NEXT:    vzeroupper
570; AVX2-NEXT:    retq
571;
572; AVX512F-LABEL: trunc_v32i16_v32i1:
573; AVX512F:       # %bb.0:
574; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
575; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
576; AVX512F-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
577; AVX512F-NEXT:    kortestw %k0, %k0
578; AVX512F-NEXT:    sete %al
579; AVX512F-NEXT:    vzeroupper
580; AVX512F-NEXT:    retq
581;
582; AVX512BW-LABEL: trunc_v32i16_v32i1:
583; AVX512BW:       # %bb.0:
584; AVX512BW-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
585; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
586; AVX512BW-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
587; AVX512BW-NEXT:    kortestw %k0, %k0
588; AVX512BW-NEXT:    sete %al
589; AVX512BW-NEXT:    vzeroupper
590; AVX512BW-NEXT:    retq
591;
592; AVX512VL-LABEL: trunc_v32i16_v32i1:
593; AVX512VL:       # %bb.0:
594; AVX512VL-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
595; AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
596; AVX512VL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
597; AVX512VL-NEXT:    kortestw %k0, %k0
598; AVX512VL-NEXT:    sete %al
599; AVX512VL-NEXT:    vzeroupper
600; AVX512VL-NEXT:    retq
601  %a = trunc <32 x i16> %0 to <32 x i1>
602  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
603  ret i1 %b
604}
605
606define i1 @trunc_v64i8_v64i1(<64 x i8>) nounwind {
607; X86-SSE2-LABEL: trunc_v64i8_v64i1:
608; X86-SSE2:       # %bb.0:
609; X86-SSE2-NEXT:    pushl %ebp
610; X86-SSE2-NEXT:    movl %esp, %ebp
611; X86-SSE2-NEXT:    andl $-16, %esp
612; X86-SSE2-NEXT:    subl $16, %esp
613; X86-SSE2-NEXT:    pand %xmm2, %xmm0
614; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
615; X86-SSE2-NEXT:    pand %xmm0, %xmm1
616; X86-SSE2-NEXT:    psllw $7, %xmm1
617; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
618; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
619; X86-SSE2-NEXT:    sete %al
620; X86-SSE2-NEXT:    movl %ebp, %esp
621; X86-SSE2-NEXT:    popl %ebp
622; X86-SSE2-NEXT:    retl
623;
624; X64-SSE2-LABEL: trunc_v64i8_v64i1:
625; X64-SSE2:       # %bb.0:
626; X64-SSE2-NEXT:    pand %xmm3, %xmm1
627; X64-SSE2-NEXT:    pand %xmm2, %xmm0
628; X64-SSE2-NEXT:    pand %xmm1, %xmm0
629; X64-SSE2-NEXT:    psllw $7, %xmm0
630; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
631; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
632; X64-SSE2-NEXT:    sete %al
633; X64-SSE2-NEXT:    retq
634;
635; SSE41-LABEL: trunc_v64i8_v64i1:
636; SSE41:       # %bb.0:
637; SSE41-NEXT:    pand %xmm3, %xmm1
638; SSE41-NEXT:    pand %xmm2, %xmm0
639; SSE41-NEXT:    pand %xmm1, %xmm0
640; SSE41-NEXT:    ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
641; SSE41-NEXT:    setb %al
642; SSE41-NEXT:    retq
643;
644; AVX1-LABEL: trunc_v64i8_v64i1:
645; AVX1:       # %bb.0:
646; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
647; AVX1-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
648; AVX1-NEXT:    setb %al
649; AVX1-NEXT:    vzeroupper
650; AVX1-NEXT:    retq
651;
652; AVX2-LABEL: trunc_v64i8_v64i1:
653; AVX2:       # %bb.0:
654; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
655; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
656; AVX2-NEXT:    vptest %ymm1, %ymm0
657; AVX2-NEXT:    setb %al
658; AVX2-NEXT:    vzeroupper
659; AVX2-NEXT:    retq
660;
661; AVX512F-LABEL: trunc_v64i8_v64i1:
662; AVX512F:       # %bb.0:
663; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
664; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
665; AVX512F-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
666; AVX512F-NEXT:    kortestw %k0, %k0
667; AVX512F-NEXT:    sete %al
668; AVX512F-NEXT:    vzeroupper
669; AVX512F-NEXT:    retq
670;
671; AVX512BW-LABEL: trunc_v64i8_v64i1:
672; AVX512BW:       # %bb.0:
673; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
674; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
675; AVX512BW-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
676; AVX512BW-NEXT:    kortestw %k0, %k0
677; AVX512BW-NEXT:    sete %al
678; AVX512BW-NEXT:    vzeroupper
679; AVX512BW-NEXT:    retq
680;
681; AVX512VL-LABEL: trunc_v64i8_v64i1:
682; AVX512VL:       # %bb.0:
683; AVX512VL-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
684; AVX512VL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
685; AVX512VL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
686; AVX512VL-NEXT:    kortestw %k0, %k0
687; AVX512VL-NEXT:    sete %al
688; AVX512VL-NEXT:    vzeroupper
689; AVX512VL-NEXT:    retq
690  %a = trunc <64 x i8> %0 to <64 x i1>
691  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
692  ret i1 %b
693}
694
695;
696; Comparison With Zero
697;
698
699define i1 @icmp0_v2i64_v2i1(<2 x i64>) nounwind {
700; SSE2-LABEL: icmp0_v2i64_v2i1:
701; SSE2:       # %bb.0:
702; SSE2-NEXT:    pxor %xmm1, %xmm1
703; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
704; SSE2-NEXT:    movmskps %xmm1, %eax
705; SSE2-NEXT:    xorl $15, %eax
706; SSE2-NEXT:    sete %al
707; SSE2-NEXT:    ret{{[l|q]}}
708;
709; SSE41-LABEL: icmp0_v2i64_v2i1:
710; SSE41:       # %bb.0:
711; SSE41-NEXT:    ptest %xmm0, %xmm0
712; SSE41-NEXT:    sete %al
713; SSE41-NEXT:    retq
714;
715; AVX-LABEL: icmp0_v2i64_v2i1:
716; AVX:       # %bb.0:
717; AVX-NEXT:    vptest %xmm0, %xmm0
718; AVX-NEXT:    sete %al
719; AVX-NEXT:    retq
720  %a = icmp eq <2 x i64> %0, zeroinitializer
721  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
722  ret i1 %b
723}
724
725define i1 @icmp0_v4i32_v4i1(<4 x i32>) nounwind {
726; SSE2-LABEL: icmp0_v4i32_v4i1:
727; SSE2:       # %bb.0:
728; SSE2-NEXT:    pxor %xmm1, %xmm1
729; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
730; SSE2-NEXT:    movmskps %xmm1, %eax
731; SSE2-NEXT:    xorl $15, %eax
732; SSE2-NEXT:    sete %al
733; SSE2-NEXT:    ret{{[l|q]}}
734;
735; SSE41-LABEL: icmp0_v4i32_v4i1:
736; SSE41:       # %bb.0:
737; SSE41-NEXT:    ptest %xmm0, %xmm0
738; SSE41-NEXT:    sete %al
739; SSE41-NEXT:    retq
740;
741; AVX-LABEL: icmp0_v4i32_v4i1:
742; AVX:       # %bb.0:
743; AVX-NEXT:    vptest %xmm0, %xmm0
744; AVX-NEXT:    sete %al
745; AVX-NEXT:    retq
746  %a = icmp eq <4 x i32> %0, zeroinitializer
747  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
748  ret i1 %b
749}
750
751define i1 @icmp0_v8i16_v8i1(<8 x i16>) nounwind {
752; SSE2-LABEL: icmp0_v8i16_v8i1:
753; SSE2:       # %bb.0:
754; SSE2-NEXT:    pxor %xmm1, %xmm1
755; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
756; SSE2-NEXT:    pmovmskb %xmm1, %eax
757; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
758; SSE2-NEXT:    sete %al
759; SSE2-NEXT:    ret{{[l|q]}}
760;
761; SSE41-LABEL: icmp0_v8i16_v8i1:
762; SSE41:       # %bb.0:
763; SSE41-NEXT:    ptest %xmm0, %xmm0
764; SSE41-NEXT:    sete %al
765; SSE41-NEXT:    retq
766;
767; AVX-LABEL: icmp0_v8i16_v8i1:
768; AVX:       # %bb.0:
769; AVX-NEXT:    vptest %xmm0, %xmm0
770; AVX-NEXT:    sete %al
771; AVX-NEXT:    retq
772  %a = icmp eq <8 x i16> %0, zeroinitializer
773  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
774  ret i1 %b
775}
776
777define i1 @icmp0_v16i8_v16i1(<16 x i8>) nounwind {
778; SSE2-LABEL: icmp0_v16i8_v16i1:
779; SSE2:       # %bb.0:
780; SSE2-NEXT:    pxor %xmm1, %xmm1
781; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
782; SSE2-NEXT:    pmovmskb %xmm1, %eax
783; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
784; SSE2-NEXT:    sete %al
785; SSE2-NEXT:    ret{{[l|q]}}
786;
787; SSE41-LABEL: icmp0_v16i8_v16i1:
788; SSE41:       # %bb.0:
789; SSE41-NEXT:    ptest %xmm0, %xmm0
790; SSE41-NEXT:    sete %al
791; SSE41-NEXT:    retq
792;
793; AVX-LABEL: icmp0_v16i8_v16i1:
794; AVX:       # %bb.0:
795; AVX-NEXT:    vptest %xmm0, %xmm0
796; AVX-NEXT:    sete %al
797; AVX-NEXT:    retq
798  %a = icmp eq <16 x i8> %0, zeroinitializer
799  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
800  ret i1 %b
801}
802
803define i1 @icmp0_v4i64_v4i1(<4 x i64>) nounwind {
804; SSE2-LABEL: icmp0_v4i64_v4i1:
805; SSE2:       # %bb.0:
806; SSE2-NEXT:    por %xmm1, %xmm0
807; SSE2-NEXT:    pxor %xmm1, %xmm1
808; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
809; SSE2-NEXT:    movmskps %xmm1, %eax
810; SSE2-NEXT:    xorl $15, %eax
811; SSE2-NEXT:    sete %al
812; SSE2-NEXT:    ret{{[l|q]}}
813;
814; SSE41-LABEL: icmp0_v4i64_v4i1:
815; SSE41:       # %bb.0:
816; SSE41-NEXT:    por %xmm1, %xmm0
817; SSE41-NEXT:    ptest %xmm0, %xmm0
818; SSE41-NEXT:    sete %al
819; SSE41-NEXT:    retq
820;
821; AVX-LABEL: icmp0_v4i64_v4i1:
822; AVX:       # %bb.0:
823; AVX-NEXT:    vptest %ymm0, %ymm0
824; AVX-NEXT:    sete %al
825; AVX-NEXT:    vzeroupper
826; AVX-NEXT:    retq
827  %a = icmp eq <4 x i64> %0, zeroinitializer
828  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
829  ret i1 %b
830}
831
832define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind {
833; SSE2-LABEL: icmp0_v8i32_v8i1:
834; SSE2:       # %bb.0:
835; SSE2-NEXT:    por %xmm1, %xmm0
836; SSE2-NEXT:    pxor %xmm1, %xmm1
837; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
838; SSE2-NEXT:    movmskps %xmm1, %eax
839; SSE2-NEXT:    xorl $15, %eax
840; SSE2-NEXT:    sete %al
841; SSE2-NEXT:    ret{{[l|q]}}
842;
843; SSE41-LABEL: icmp0_v8i32_v8i1:
844; SSE41:       # %bb.0:
845; SSE41-NEXT:    por %xmm1, %xmm0
846; SSE41-NEXT:    ptest %xmm0, %xmm0
847; SSE41-NEXT:    sete %al
848; SSE41-NEXT:    retq
849;
850; AVX-LABEL: icmp0_v8i32_v8i1:
851; AVX:       # %bb.0:
852; AVX-NEXT:    vptest %ymm0, %ymm0
853; AVX-NEXT:    sete %al
854; AVX-NEXT:    vzeroupper
855; AVX-NEXT:    retq
856  %a = icmp eq <8 x i32> %0, zeroinitializer
857  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
858  ret i1 %b
859}
860
861define i1 @icmp0_v16i16_v16i1(<16 x i16>) nounwind {
862; SSE2-LABEL: icmp0_v16i16_v16i1:
863; SSE2:       # %bb.0:
864; SSE2-NEXT:    por %xmm1, %xmm0
865; SSE2-NEXT:    pxor %xmm1, %xmm1
866; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
867; SSE2-NEXT:    pmovmskb %xmm1, %eax
868; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
869; SSE2-NEXT:    sete %al
870; SSE2-NEXT:    ret{{[l|q]}}
871;
872; SSE41-LABEL: icmp0_v16i16_v16i1:
873; SSE41:       # %bb.0:
874; SSE41-NEXT:    por %xmm1, %xmm0
875; SSE41-NEXT:    ptest %xmm0, %xmm0
876; SSE41-NEXT:    sete %al
877; SSE41-NEXT:    retq
878;
879; AVX-LABEL: icmp0_v16i16_v16i1:
880; AVX:       # %bb.0:
881; AVX-NEXT:    vptest %ymm0, %ymm0
882; AVX-NEXT:    sete %al
883; AVX-NEXT:    vzeroupper
884; AVX-NEXT:    retq
885  %a = icmp eq <16 x i16> %0, zeroinitializer
886  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
887  ret i1 %b
888}
889
890define i1 @icmp0_v32i8_v32i1(<32 x i8>) nounwind {
891; SSE2-LABEL: icmp0_v32i8_v32i1:
892; SSE2:       # %bb.0:
893; SSE2-NEXT:    por %xmm1, %xmm0
894; SSE2-NEXT:    pxor %xmm1, %xmm1
895; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
896; SSE2-NEXT:    pmovmskb %xmm1, %eax
897; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
898; SSE2-NEXT:    sete %al
899; SSE2-NEXT:    ret{{[l|q]}}
900;
901; SSE41-LABEL: icmp0_v32i8_v32i1:
902; SSE41:       # %bb.0:
903; SSE41-NEXT:    por %xmm1, %xmm0
904; SSE41-NEXT:    ptest %xmm0, %xmm0
905; SSE41-NEXT:    sete %al
906; SSE41-NEXT:    retq
907;
908; AVX-LABEL: icmp0_v32i8_v32i1:
909; AVX:       # %bb.0:
910; AVX-NEXT:    vptest %ymm0, %ymm0
911; AVX-NEXT:    sete %al
912; AVX-NEXT:    vzeroupper
913; AVX-NEXT:    retq
914  %a = icmp eq <32 x i8> %0, zeroinitializer
915  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
916  ret i1 %b
917}
918
919define i1 @icmp0_v8i64_v8i1(<8 x i64>) nounwind {
920; X86-SSE2-LABEL: icmp0_v8i64_v8i1:
921; X86-SSE2:       # %bb.0:
922; X86-SSE2-NEXT:    pushl %ebp
923; X86-SSE2-NEXT:    movl %esp, %ebp
924; X86-SSE2-NEXT:    andl $-16, %esp
925; X86-SSE2-NEXT:    subl $16, %esp
926; X86-SSE2-NEXT:    por %xmm2, %xmm0
927; X86-SSE2-NEXT:    por 8(%ebp), %xmm1
928; X86-SSE2-NEXT:    por %xmm0, %xmm1
929; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
930; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
931; X86-SSE2-NEXT:    movmskps %xmm0, %eax
932; X86-SSE2-NEXT:    xorl $15, %eax
933; X86-SSE2-NEXT:    sete %al
934; X86-SSE2-NEXT:    movl %ebp, %esp
935; X86-SSE2-NEXT:    popl %ebp
936; X86-SSE2-NEXT:    retl
937;
938; X64-SSE2-LABEL: icmp0_v8i64_v8i1:
939; X64-SSE2:       # %bb.0:
940; X64-SSE2-NEXT:    por %xmm3, %xmm1
941; X64-SSE2-NEXT:    por %xmm2, %xmm0
942; X64-SSE2-NEXT:    por %xmm1, %xmm0
943; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
944; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
945; X64-SSE2-NEXT:    movmskps %xmm1, %eax
946; X64-SSE2-NEXT:    xorl $15, %eax
947; X64-SSE2-NEXT:    sete %al
948; X64-SSE2-NEXT:    retq
949;
950; SSE41-LABEL: icmp0_v8i64_v8i1:
951; SSE41:       # %bb.0:
952; SSE41-NEXT:    por %xmm3, %xmm1
953; SSE41-NEXT:    por %xmm2, %xmm0
954; SSE41-NEXT:    por %xmm1, %xmm0
955; SSE41-NEXT:    ptest %xmm0, %xmm0
956; SSE41-NEXT:    sete %al
957; SSE41-NEXT:    retq
958;
959; AVX1-LABEL: icmp0_v8i64_v8i1:
960; AVX1:       # %bb.0:
961; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
962; AVX1-NEXT:    vptest %ymm0, %ymm0
963; AVX1-NEXT:    sete %al
964; AVX1-NEXT:    vzeroupper
965; AVX1-NEXT:    retq
966;
967; AVX2-LABEL: icmp0_v8i64_v8i1:
968; AVX2:       # %bb.0:
969; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
970; AVX2-NEXT:    vptest %ymm0, %ymm0
971; AVX2-NEXT:    sete %al
972; AVX2-NEXT:    vzeroupper
973; AVX2-NEXT:    retq
974;
975; AVX512-LABEL: icmp0_v8i64_v8i1:
976; AVX512:       # %bb.0:
977; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
978; AVX512-NEXT:    kortestw %k0, %k0
979; AVX512-NEXT:    sete %al
980; AVX512-NEXT:    vzeroupper
981; AVX512-NEXT:    retq
982  %a = icmp eq <8 x i64> %0, zeroinitializer
983  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
984  ret i1 %b
985}
986
987define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind {
988; X86-SSE2-LABEL: icmp0_v16i32_v16i1:
989; X86-SSE2:       # %bb.0:
990; X86-SSE2-NEXT:    pushl %ebp
991; X86-SSE2-NEXT:    movl %esp, %ebp
992; X86-SSE2-NEXT:    andl $-16, %esp
993; X86-SSE2-NEXT:    subl $16, %esp
994; X86-SSE2-NEXT:    por %xmm2, %xmm0
995; X86-SSE2-NEXT:    por 8(%ebp), %xmm1
996; X86-SSE2-NEXT:    por %xmm0, %xmm1
997; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
998; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
999; X86-SSE2-NEXT:    movmskps %xmm0, %eax
1000; X86-SSE2-NEXT:    xorl $15, %eax
1001; X86-SSE2-NEXT:    sete %al
1002; X86-SSE2-NEXT:    movl %ebp, %esp
1003; X86-SSE2-NEXT:    popl %ebp
1004; X86-SSE2-NEXT:    retl
1005;
1006; X64-SSE2-LABEL: icmp0_v16i32_v16i1:
1007; X64-SSE2:       # %bb.0:
1008; X64-SSE2-NEXT:    por %xmm3, %xmm1
1009; X64-SSE2-NEXT:    por %xmm2, %xmm0
1010; X64-SSE2-NEXT:    por %xmm1, %xmm0
1011; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
1012; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1013; X64-SSE2-NEXT:    movmskps %xmm1, %eax
1014; X64-SSE2-NEXT:    xorl $15, %eax
1015; X64-SSE2-NEXT:    sete %al
1016; X64-SSE2-NEXT:    retq
1017;
1018; SSE41-LABEL: icmp0_v16i32_v16i1:
1019; SSE41:       # %bb.0:
1020; SSE41-NEXT:    por %xmm3, %xmm1
1021; SSE41-NEXT:    por %xmm2, %xmm0
1022; SSE41-NEXT:    por %xmm1, %xmm0
1023; SSE41-NEXT:    ptest %xmm0, %xmm0
1024; SSE41-NEXT:    sete %al
1025; SSE41-NEXT:    retq
1026;
1027; AVX1-LABEL: icmp0_v16i32_v16i1:
1028; AVX1:       # %bb.0:
1029; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
1030; AVX1-NEXT:    vptest %ymm0, %ymm0
1031; AVX1-NEXT:    sete %al
1032; AVX1-NEXT:    vzeroupper
1033; AVX1-NEXT:    retq
1034;
1035; AVX2-LABEL: icmp0_v16i32_v16i1:
1036; AVX2:       # %bb.0:
1037; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1038; AVX2-NEXT:    vptest %ymm0, %ymm0
1039; AVX2-NEXT:    sete %al
1040; AVX2-NEXT:    vzeroupper
1041; AVX2-NEXT:    retq
1042;
1043; AVX512-LABEL: icmp0_v16i32_v16i1:
1044; AVX512:       # %bb.0:
1045; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
1046; AVX512-NEXT:    kortestw %k0, %k0
1047; AVX512-NEXT:    sete %al
1048; AVX512-NEXT:    vzeroupper
1049; AVX512-NEXT:    retq
1050  %a = icmp eq <16 x i32> %0, zeroinitializer
1051  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
1052  ret i1 %b
1053}
1054
1055define i1 @icmp0_v32i16_v32i1(<32 x i16>) nounwind {
1056; X86-SSE2-LABEL: icmp0_v32i16_v32i1:
1057; X86-SSE2:       # %bb.0:
1058; X86-SSE2-NEXT:    pushl %ebp
1059; X86-SSE2-NEXT:    movl %esp, %ebp
1060; X86-SSE2-NEXT:    andl $-16, %esp
1061; X86-SSE2-NEXT:    subl $16, %esp
1062; X86-SSE2-NEXT:    por %xmm2, %xmm0
1063; X86-SSE2-NEXT:    por 8(%ebp), %xmm1
1064; X86-SSE2-NEXT:    por %xmm0, %xmm1
1065; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
1066; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
1067; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1068; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1069; X86-SSE2-NEXT:    sete %al
1070; X86-SSE2-NEXT:    movl %ebp, %esp
1071; X86-SSE2-NEXT:    popl %ebp
1072; X86-SSE2-NEXT:    retl
1073;
1074; X64-SSE2-LABEL: icmp0_v32i16_v32i1:
1075; X64-SSE2:       # %bb.0:
1076; X64-SSE2-NEXT:    por %xmm3, %xmm1
1077; X64-SSE2-NEXT:    por %xmm2, %xmm0
1078; X64-SSE2-NEXT:    por %xmm1, %xmm0
1079; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
1080; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1081; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
1082; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1083; X64-SSE2-NEXT:    sete %al
1084; X64-SSE2-NEXT:    retq
1085;
1086; SSE41-LABEL: icmp0_v32i16_v32i1:
1087; SSE41:       # %bb.0:
1088; SSE41-NEXT:    por %xmm3, %xmm1
1089; SSE41-NEXT:    por %xmm2, %xmm0
1090; SSE41-NEXT:    por %xmm1, %xmm0
1091; SSE41-NEXT:    ptest %xmm0, %xmm0
1092; SSE41-NEXT:    sete %al
1093; SSE41-NEXT:    retq
1094;
1095; AVX1-LABEL: icmp0_v32i16_v32i1:
1096; AVX1:       # %bb.0:
1097; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
1098; AVX1-NEXT:    vptest %ymm0, %ymm0
1099; AVX1-NEXT:    sete %al
1100; AVX1-NEXT:    vzeroupper
1101; AVX1-NEXT:    retq
1102;
1103; AVX2-LABEL: icmp0_v32i16_v32i1:
1104; AVX2:       # %bb.0:
1105; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1106; AVX2-NEXT:    vptest %ymm0, %ymm0
1107; AVX2-NEXT:    sete %al
1108; AVX2-NEXT:    vzeroupper
1109; AVX2-NEXT:    retq
1110;
1111; AVX512-LABEL: icmp0_v32i16_v32i1:
1112; AVX512:       # %bb.0:
1113; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
1114; AVX512-NEXT:    kortestw %k0, %k0
1115; AVX512-NEXT:    sete %al
1116; AVX512-NEXT:    vzeroupper
1117; AVX512-NEXT:    retq
1118  %a = icmp eq <32 x i16> %0, zeroinitializer
1119  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
1120  ret i1 %b
1121}
1122
1123define i1 @icmp0_v64i8_v64i1(<64 x i8>) nounwind {
1124; X86-SSE2-LABEL: icmp0_v64i8_v64i1:
1125; X86-SSE2:       # %bb.0:
1126; X86-SSE2-NEXT:    pushl %ebp
1127; X86-SSE2-NEXT:    movl %esp, %ebp
1128; X86-SSE2-NEXT:    andl $-16, %esp
1129; X86-SSE2-NEXT:    subl $16, %esp
1130; X86-SSE2-NEXT:    por %xmm2, %xmm0
1131; X86-SSE2-NEXT:    por 8(%ebp), %xmm1
1132; X86-SSE2-NEXT:    por %xmm0, %xmm1
1133; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
1134; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
1135; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1136; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1137; X86-SSE2-NEXT:    sete %al
1138; X86-SSE2-NEXT:    movl %ebp, %esp
1139; X86-SSE2-NEXT:    popl %ebp
1140; X86-SSE2-NEXT:    retl
1141;
1142; X64-SSE2-LABEL: icmp0_v64i8_v64i1:
1143; X64-SSE2:       # %bb.0:
1144; X64-SSE2-NEXT:    por %xmm3, %xmm1
1145; X64-SSE2-NEXT:    por %xmm2, %xmm0
1146; X64-SSE2-NEXT:    por %xmm1, %xmm0
1147; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
1148; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1149; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
1150; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1151; X64-SSE2-NEXT:    sete %al
1152; X64-SSE2-NEXT:    retq
1153;
1154; SSE41-LABEL: icmp0_v64i8_v64i1:
1155; SSE41:       # %bb.0:
1156; SSE41-NEXT:    por %xmm3, %xmm1
1157; SSE41-NEXT:    por %xmm2, %xmm0
1158; SSE41-NEXT:    por %xmm1, %xmm0
1159; SSE41-NEXT:    ptest %xmm0, %xmm0
1160; SSE41-NEXT:    sete %al
1161; SSE41-NEXT:    retq
1162;
1163; AVX1-LABEL: icmp0_v64i8_v64i1:
1164; AVX1:       # %bb.0:
1165; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
1166; AVX1-NEXT:    vptest %ymm0, %ymm0
1167; AVX1-NEXT:    sete %al
1168; AVX1-NEXT:    vzeroupper
1169; AVX1-NEXT:    retq
1170;
1171; AVX2-LABEL: icmp0_v64i8_v64i1:
1172; AVX2:       # %bb.0:
1173; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1174; AVX2-NEXT:    vptest %ymm0, %ymm0
1175; AVX2-NEXT:    sete %al
1176; AVX2-NEXT:    vzeroupper
1177; AVX2-NEXT:    retq
1178;
1179; AVX512-LABEL: icmp0_v64i8_v64i1:
1180; AVX512:       # %bb.0:
1181; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
1182; AVX512-NEXT:    kortestw %k0, %k0
1183; AVX512-NEXT:    sete %al
1184; AVX512-NEXT:    vzeroupper
1185; AVX512-NEXT:    retq
1186  %a = icmp eq <64 x i8> %0, zeroinitializer
1187  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
1188  ret i1 %b
1189}
1190
1191define i8 @icmp0_v8i1(<8 x i8>) nounwind {
1192; SSE2-LABEL: icmp0_v8i1:
1193; SSE2:       # %bb.0:
1194; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1195; SSE2-NEXT:    psllw $15, %xmm0
1196; SSE2-NEXT:    pmovmskb %xmm0, %eax
1197; SSE2-NEXT:    testl $43690, %eax # imm = 0xAAAA
1198; SSE2-NEXT:    sete %al
1199; SSE2-NEXT:    ret{{[l|q]}}
1200;
1201; SSE41-LABEL: icmp0_v8i1:
1202; SSE41:       # %bb.0:
1203; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1204; SSE41-NEXT:    psllw $15, %xmm0
1205; SSE41-NEXT:    pmovmskb %xmm0, %eax
1206; SSE41-NEXT:    testl $43690, %eax # imm = 0xAAAA
1207; SSE41-NEXT:    sete %al
1208; SSE41-NEXT:    retq
1209;
1210; AVX1OR2-LABEL: icmp0_v8i1:
1211; AVX1OR2:       # %bb.0:
1212; AVX1OR2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1213; AVX1OR2-NEXT:    vpsllw $15, %xmm0, %xmm0
1214; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
1215; AVX1OR2-NEXT:    testl $43690, %eax # imm = 0xAAAA
1216; AVX1OR2-NEXT:    sete %al
1217; AVX1OR2-NEXT:    retq
1218;
1219; AVX512F-LABEL: icmp0_v8i1:
1220; AVX512F:       # %bb.0:
1221; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1222; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1223; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1224; AVX512F-NEXT:    kmovw %k0, %eax
1225; AVX512F-NEXT:    testb %al, %al
1226; AVX512F-NEXT:    sete %al
1227; AVX512F-NEXT:    vzeroupper
1228; AVX512F-NEXT:    retq
1229;
1230; AVX512BW-LABEL: icmp0_v8i1:
1231; AVX512BW:       # %bb.0:
1232; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
1233; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
1234; AVX512BW-NEXT:    kmovd %k0, %eax
1235; AVX512BW-NEXT:    testb %al, %al
1236; AVX512BW-NEXT:    sete %al
1237; AVX512BW-NEXT:    vzeroupper
1238; AVX512BW-NEXT:    retq
1239;
1240; AVX512VL-LABEL: icmp0_v8i1:
1241; AVX512VL:       # %bb.0:
1242; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
1243; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
1244; AVX512VL-NEXT:    kmovd %k0, %eax
1245; AVX512VL-NEXT:    testb %al, %al
1246; AVX512VL-NEXT:    sete %al
1247; AVX512VL-NEXT:    retq
1248  %a = trunc <8 x i8> %0 to <8 x i1>
1249  %b = icmp eq <8 x i1> %a, zeroinitializer
1250  %c = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %b)
1251  %d = zext i1 %c to i8
1252  ret i8 %d
1253}
1254
1255;
1256; Comparison With All Ones
1257;
1258
1259define i1 @icmp1_v2i64_v2i1(<2 x i64>) nounwind {
1260; SSE2-LABEL: icmp1_v2i64_v2i1:
1261; SSE2:       # %bb.0:
1262; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1263; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1264; SSE2-NEXT:    movmskps %xmm1, %eax
1265; SSE2-NEXT:    xorl $15, %eax
1266; SSE2-NEXT:    sete %al
1267; SSE2-NEXT:    ret{{[l|q]}}
1268;
1269; SSE41-LABEL: icmp1_v2i64_v2i1:
1270; SSE41:       # %bb.0:
1271; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1272; SSE41-NEXT:    ptest %xmm1, %xmm0
1273; SSE41-NEXT:    setb %al
1274; SSE41-NEXT:    retq
1275;
1276; AVX-LABEL: icmp1_v2i64_v2i1:
1277; AVX:       # %bb.0:
1278; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1279; AVX-NEXT:    vptest %xmm1, %xmm0
1280; AVX-NEXT:    setb %al
1281; AVX-NEXT:    retq
1282  %a = icmp eq <2 x i64> %0, <i64 -1, i64 -1>
1283  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
1284  ret i1 %b
1285}
1286
1287define i1 @icmp1_v4i32_v4i1(<4 x i32>) nounwind {
1288; SSE2-LABEL: icmp1_v4i32_v4i1:
1289; SSE2:       # %bb.0:
1290; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1291; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1292; SSE2-NEXT:    movmskps %xmm1, %eax
1293; SSE2-NEXT:    xorl $15, %eax
1294; SSE2-NEXT:    sete %al
1295; SSE2-NEXT:    ret{{[l|q]}}
1296;
1297; SSE41-LABEL: icmp1_v4i32_v4i1:
1298; SSE41:       # %bb.0:
1299; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1300; SSE41-NEXT:    ptest %xmm1, %xmm0
1301; SSE41-NEXT:    setb %al
1302; SSE41-NEXT:    retq
1303;
1304; AVX-LABEL: icmp1_v4i32_v4i1:
1305; AVX:       # %bb.0:
1306; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1307; AVX-NEXT:    vptest %xmm1, %xmm0
1308; AVX-NEXT:    setb %al
1309; AVX-NEXT:    retq
1310  %a = icmp eq <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
1311  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
1312  ret i1 %b
1313}
1314
1315define i1 @icmp1_v8i16_v8i1(<8 x i16>) nounwind {
1316; SSE2-LABEL: icmp1_v8i16_v8i1:
1317; SSE2:       # %bb.0:
1318; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1319; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1320; SSE2-NEXT:    pmovmskb %xmm1, %eax
1321; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1322; SSE2-NEXT:    sete %al
1323; SSE2-NEXT:    ret{{[l|q]}}
1324;
1325; SSE41-LABEL: icmp1_v8i16_v8i1:
1326; SSE41:       # %bb.0:
1327; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1328; SSE41-NEXT:    ptest %xmm1, %xmm0
1329; SSE41-NEXT:    setb %al
1330; SSE41-NEXT:    retq
1331;
1332; AVX-LABEL: icmp1_v8i16_v8i1:
1333; AVX:       # %bb.0:
1334; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1335; AVX-NEXT:    vptest %xmm1, %xmm0
1336; AVX-NEXT:    setb %al
1337; AVX-NEXT:    retq
1338  %a = icmp eq <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1339  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
1340  ret i1 %b
1341}
1342
1343define i1 @icmp1_v16i8_v16i1(<16 x i8>) nounwind {
1344; SSE2-LABEL: icmp1_v16i8_v16i1:
1345; SSE2:       # %bb.0:
1346; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1347; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1348; SSE2-NEXT:    pmovmskb %xmm1, %eax
1349; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1350; SSE2-NEXT:    sete %al
1351; SSE2-NEXT:    ret{{[l|q]}}
1352;
1353; SSE41-LABEL: icmp1_v16i8_v16i1:
1354; SSE41:       # %bb.0:
1355; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1356; SSE41-NEXT:    ptest %xmm1, %xmm0
1357; SSE41-NEXT:    setb %al
1358; SSE41-NEXT:    retq
1359;
1360; AVX-LABEL: icmp1_v16i8_v16i1:
1361; AVX:       # %bb.0:
1362; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1363; AVX-NEXT:    vptest %xmm1, %xmm0
1364; AVX-NEXT:    setb %al
1365; AVX-NEXT:    retq
1366  %a = icmp eq <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1367  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
1368  ret i1 %b
1369}
1370
1371define i1 @icmp1_v4i64_v4i1(<4 x i64>) nounwind {
1372; SSE2-LABEL: icmp1_v4i64_v4i1:
1373; SSE2:       # %bb.0:
1374; SSE2-NEXT:    pand %xmm1, %xmm0
1375; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1376; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1377; SSE2-NEXT:    movmskps %xmm1, %eax
1378; SSE2-NEXT:    xorl $15, %eax
1379; SSE2-NEXT:    sete %al
1380; SSE2-NEXT:    ret{{[l|q]}}
1381;
1382; SSE41-LABEL: icmp1_v4i64_v4i1:
1383; SSE41:       # %bb.0:
1384; SSE41-NEXT:    pand %xmm1, %xmm0
1385; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1386; SSE41-NEXT:    ptest %xmm1, %xmm0
1387; SSE41-NEXT:    setb %al
1388; SSE41-NEXT:    retq
1389;
1390; AVX1-LABEL: icmp1_v4i64_v4i1:
1391; AVX1:       # %bb.0:
1392; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1393; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1394; AVX1-NEXT:    vptest %ymm1, %ymm0
1395; AVX1-NEXT:    setb %al
1396; AVX1-NEXT:    vzeroupper
1397; AVX1-NEXT:    retq
1398;
1399; AVX2-LABEL: icmp1_v4i64_v4i1:
1400; AVX2:       # %bb.0:
1401; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1402; AVX2-NEXT:    vptest %ymm1, %ymm0
1403; AVX2-NEXT:    setb %al
1404; AVX2-NEXT:    vzeroupper
1405; AVX2-NEXT:    retq
1406;
1407; AVX512-LABEL: icmp1_v4i64_v4i1:
1408; AVX512:       # %bb.0:
1409; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1410; AVX512-NEXT:    vptest %ymm1, %ymm0
1411; AVX512-NEXT:    setb %al
1412; AVX512-NEXT:    vzeroupper
1413; AVX512-NEXT:    retq
1414  %a = icmp eq <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
1415  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
1416  ret i1 %b
1417}
1418
1419define i1 @icmp1_v8i32_v8i1(<8 x i32>) nounwind {
1420; SSE2-LABEL: icmp1_v8i32_v8i1:
1421; SSE2:       # %bb.0:
1422; SSE2-NEXT:    pand %xmm1, %xmm0
1423; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1424; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1425; SSE2-NEXT:    movmskps %xmm1, %eax
1426; SSE2-NEXT:    xorl $15, %eax
1427; SSE2-NEXT:    sete %al
1428; SSE2-NEXT:    ret{{[l|q]}}
1429;
1430; SSE41-LABEL: icmp1_v8i32_v8i1:
1431; SSE41:       # %bb.0:
1432; SSE41-NEXT:    pand %xmm1, %xmm0
1433; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1434; SSE41-NEXT:    ptest %xmm1, %xmm0
1435; SSE41-NEXT:    setb %al
1436; SSE41-NEXT:    retq
1437;
1438; AVX1-LABEL: icmp1_v8i32_v8i1:
1439; AVX1:       # %bb.0:
1440; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1441; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1442; AVX1-NEXT:    vptest %ymm1, %ymm0
1443; AVX1-NEXT:    setb %al
1444; AVX1-NEXT:    vzeroupper
1445; AVX1-NEXT:    retq
1446;
1447; AVX2-LABEL: icmp1_v8i32_v8i1:
1448; AVX2:       # %bb.0:
1449; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1450; AVX2-NEXT:    vptest %ymm1, %ymm0
1451; AVX2-NEXT:    setb %al
1452; AVX2-NEXT:    vzeroupper
1453; AVX2-NEXT:    retq
1454;
1455; AVX512-LABEL: icmp1_v8i32_v8i1:
1456; AVX512:       # %bb.0:
1457; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1458; AVX512-NEXT:    vptest %ymm1, %ymm0
1459; AVX512-NEXT:    setb %al
1460; AVX512-NEXT:    vzeroupper
1461; AVX512-NEXT:    retq
1462  %a = icmp eq <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1463  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
1464  ret i1 %b
1465}
1466
1467define i1 @icmp1_v16i16_v16i1(<16 x i16>) nounwind {
1468; SSE2-LABEL: icmp1_v16i16_v16i1:
1469; SSE2:       # %bb.0:
1470; SSE2-NEXT:    pand %xmm1, %xmm0
1471; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1472; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1473; SSE2-NEXT:    pmovmskb %xmm1, %eax
1474; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1475; SSE2-NEXT:    sete %al
1476; SSE2-NEXT:    ret{{[l|q]}}
1477;
1478; SSE41-LABEL: icmp1_v16i16_v16i1:
1479; SSE41:       # %bb.0:
1480; SSE41-NEXT:    pand %xmm1, %xmm0
1481; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1482; SSE41-NEXT:    ptest %xmm1, %xmm0
1483; SSE41-NEXT:    setb %al
1484; SSE41-NEXT:    retq
1485;
1486; AVX1-LABEL: icmp1_v16i16_v16i1:
1487; AVX1:       # %bb.0:
1488; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1489; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1490; AVX1-NEXT:    vptest %ymm1, %ymm0
1491; AVX1-NEXT:    setb %al
1492; AVX1-NEXT:    vzeroupper
1493; AVX1-NEXT:    retq
1494;
1495; AVX2-LABEL: icmp1_v16i16_v16i1:
1496; AVX2:       # %bb.0:
1497; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1498; AVX2-NEXT:    vptest %ymm1, %ymm0
1499; AVX2-NEXT:    setb %al
1500; AVX2-NEXT:    vzeroupper
1501; AVX2-NEXT:    retq
1502;
1503; AVX512-LABEL: icmp1_v16i16_v16i1:
1504; AVX512:       # %bb.0:
1505; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1506; AVX512-NEXT:    vptest %ymm1, %ymm0
1507; AVX512-NEXT:    setb %al
1508; AVX512-NEXT:    vzeroupper
1509; AVX512-NEXT:    retq
1510  %a = icmp eq <16 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1511  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
1512  ret i1 %b
1513}
1514
1515define i1 @icmp1_v32i8_v32i1(<32 x i8>) nounwind {
1516; SSE2-LABEL: icmp1_v32i8_v32i1:
1517; SSE2:       # %bb.0:
1518; SSE2-NEXT:    pand %xmm1, %xmm0
1519; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1520; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1521; SSE2-NEXT:    pmovmskb %xmm1, %eax
1522; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1523; SSE2-NEXT:    sete %al
1524; SSE2-NEXT:    ret{{[l|q]}}
1525;
1526; SSE41-LABEL: icmp1_v32i8_v32i1:
1527; SSE41:       # %bb.0:
1528; SSE41-NEXT:    pand %xmm1, %xmm0
1529; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1530; SSE41-NEXT:    ptest %xmm1, %xmm0
1531; SSE41-NEXT:    setb %al
1532; SSE41-NEXT:    retq
1533;
1534; AVX1-LABEL: icmp1_v32i8_v32i1:
1535; AVX1:       # %bb.0:
1536; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1537; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1538; AVX1-NEXT:    vptest %ymm1, %ymm0
1539; AVX1-NEXT:    setb %al
1540; AVX1-NEXT:    vzeroupper
1541; AVX1-NEXT:    retq
1542;
1543; AVX2-LABEL: icmp1_v32i8_v32i1:
1544; AVX2:       # %bb.0:
1545; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1546; AVX2-NEXT:    vptest %ymm1, %ymm0
1547; AVX2-NEXT:    setb %al
1548; AVX2-NEXT:    vzeroupper
1549; AVX2-NEXT:    retq
1550;
1551; AVX512-LABEL: icmp1_v32i8_v32i1:
1552; AVX512:       # %bb.0:
1553; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1554; AVX512-NEXT:    vptest %ymm1, %ymm0
1555; AVX512-NEXT:    setb %al
1556; AVX512-NEXT:    vzeroupper
1557; AVX512-NEXT:    retq
1558  %a = icmp eq <32 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1559  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
1560  ret i1 %b
1561}
1562
1563define i1 @icmp1_v8i64_v8i1(<8 x i64>) nounwind {
1564; X86-SSE2-LABEL: icmp1_v8i64_v8i1:
1565; X86-SSE2:       # %bb.0:
1566; X86-SSE2-NEXT:    pushl %ebp
1567; X86-SSE2-NEXT:    movl %esp, %ebp
1568; X86-SSE2-NEXT:    andl $-16, %esp
1569; X86-SSE2-NEXT:    subl $16, %esp
1570; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1571; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
1572; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1573; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1574; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1575; X86-SSE2-NEXT:    movmskps %xmm0, %eax
1576; X86-SSE2-NEXT:    xorl $15, %eax
1577; X86-SSE2-NEXT:    sete %al
1578; X86-SSE2-NEXT:    movl %ebp, %esp
1579; X86-SSE2-NEXT:    popl %ebp
1580; X86-SSE2-NEXT:    retl
1581;
1582; X64-SSE2-LABEL: icmp1_v8i64_v8i1:
1583; X64-SSE2:       # %bb.0:
1584; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1585; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1586; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1587; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1588; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1589; X64-SSE2-NEXT:    movmskps %xmm1, %eax
1590; X64-SSE2-NEXT:    xorl $15, %eax
1591; X64-SSE2-NEXT:    sete %al
1592; X64-SSE2-NEXT:    retq
1593;
1594; SSE41-LABEL: icmp1_v8i64_v8i1:
1595; SSE41:       # %bb.0:
1596; SSE41-NEXT:    pand %xmm3, %xmm1
1597; SSE41-NEXT:    pand %xmm2, %xmm0
1598; SSE41-NEXT:    pand %xmm1, %xmm0
1599; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1600; SSE41-NEXT:    ptest %xmm1, %xmm0
1601; SSE41-NEXT:    setb %al
1602; SSE41-NEXT:    retq
1603;
1604; AVX1-LABEL: icmp1_v8i64_v8i1:
1605; AVX1:       # %bb.0:
1606; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1607; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1608; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1609; AVX1-NEXT:    vptest %ymm1, %ymm0
1610; AVX1-NEXT:    setb %al
1611; AVX1-NEXT:    vzeroupper
1612; AVX1-NEXT:    retq
1613;
1614; AVX2-LABEL: icmp1_v8i64_v8i1:
1615; AVX2:       # %bb.0:
1616; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1617; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1618; AVX2-NEXT:    vptest %ymm1, %ymm0
1619; AVX2-NEXT:    setb %al
1620; AVX2-NEXT:    vzeroupper
1621; AVX2-NEXT:    retq
1622;
1623; AVX512-LABEL: icmp1_v8i64_v8i1:
1624; AVX512:       # %bb.0:
1625; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
1626; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
1627; AVX512-NEXT:    kortestw %k0, %k0
1628; AVX512-NEXT:    sete %al
1629; AVX512-NEXT:    vzeroupper
1630; AVX512-NEXT:    retq
1631  %a = icmp eq <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
1632  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
1633  ret i1 %b
1634}
1635
1636define i1 @icmp1_v16i32_v16i1(<16 x i32>) nounwind {
1637; X86-SSE2-LABEL: icmp1_v16i32_v16i1:
1638; X86-SSE2:       # %bb.0:
1639; X86-SSE2-NEXT:    pushl %ebp
1640; X86-SSE2-NEXT:    movl %esp, %ebp
1641; X86-SSE2-NEXT:    andl $-16, %esp
1642; X86-SSE2-NEXT:    subl $16, %esp
1643; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1644; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
1645; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1646; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1647; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1648; X86-SSE2-NEXT:    movmskps %xmm0, %eax
1649; X86-SSE2-NEXT:    xorl $15, %eax
1650; X86-SSE2-NEXT:    sete %al
1651; X86-SSE2-NEXT:    movl %ebp, %esp
1652; X86-SSE2-NEXT:    popl %ebp
1653; X86-SSE2-NEXT:    retl
1654;
1655; X64-SSE2-LABEL: icmp1_v16i32_v16i1:
1656; X64-SSE2:       # %bb.0:
1657; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1658; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1659; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1660; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1661; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1662; X64-SSE2-NEXT:    movmskps %xmm1, %eax
1663; X64-SSE2-NEXT:    xorl $15, %eax
1664; X64-SSE2-NEXT:    sete %al
1665; X64-SSE2-NEXT:    retq
1666;
1667; SSE41-LABEL: icmp1_v16i32_v16i1:
1668; SSE41:       # %bb.0:
1669; SSE41-NEXT:    pand %xmm3, %xmm1
1670; SSE41-NEXT:    pand %xmm2, %xmm0
1671; SSE41-NEXT:    pand %xmm1, %xmm0
1672; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1673; SSE41-NEXT:    ptest %xmm1, %xmm0
1674; SSE41-NEXT:    setb %al
1675; SSE41-NEXT:    retq
1676;
1677; AVX1-LABEL: icmp1_v16i32_v16i1:
1678; AVX1:       # %bb.0:
1679; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1680; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1681; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1682; AVX1-NEXT:    vptest %ymm1, %ymm0
1683; AVX1-NEXT:    setb %al
1684; AVX1-NEXT:    vzeroupper
1685; AVX1-NEXT:    retq
1686;
1687; AVX2-LABEL: icmp1_v16i32_v16i1:
1688; AVX2:       # %bb.0:
1689; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1690; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1691; AVX2-NEXT:    vptest %ymm1, %ymm0
1692; AVX2-NEXT:    setb %al
1693; AVX2-NEXT:    vzeroupper
1694; AVX2-NEXT:    retq
1695;
1696; AVX512-LABEL: icmp1_v16i32_v16i1:
1697; AVX512:       # %bb.0:
1698; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
1699; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
1700; AVX512-NEXT:    kortestw %k0, %k0
1701; AVX512-NEXT:    sete %al
1702; AVX512-NEXT:    vzeroupper
1703; AVX512-NEXT:    retq
1704  %a = icmp eq <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1705  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
1706  ret i1 %b
1707}
1708
1709define i1 @icmp1_v32i16_v32i1(<32 x i16>) nounwind {
1710; X86-SSE2-LABEL: icmp1_v32i16_v32i1:
1711; X86-SSE2:       # %bb.0:
1712; X86-SSE2-NEXT:    pushl %ebp
1713; X86-SSE2-NEXT:    movl %esp, %ebp
1714; X86-SSE2-NEXT:    andl $-16, %esp
1715; X86-SSE2-NEXT:    subl $16, %esp
1716; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1717; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
1718; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1719; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1720; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
1721; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1722; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1723; X86-SSE2-NEXT:    sete %al
1724; X86-SSE2-NEXT:    movl %ebp, %esp
1725; X86-SSE2-NEXT:    popl %ebp
1726; X86-SSE2-NEXT:    retl
1727;
1728; X64-SSE2-LABEL: icmp1_v32i16_v32i1:
1729; X64-SSE2:       # %bb.0:
1730; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1731; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1732; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1733; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1734; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1735; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
1736; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1737; X64-SSE2-NEXT:    sete %al
1738; X64-SSE2-NEXT:    retq
1739;
1740; SSE41-LABEL: icmp1_v32i16_v32i1:
1741; SSE41:       # %bb.0:
1742; SSE41-NEXT:    pand %xmm3, %xmm1
1743; SSE41-NEXT:    pand %xmm2, %xmm0
1744; SSE41-NEXT:    pand %xmm1, %xmm0
1745; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1746; SSE41-NEXT:    ptest %xmm1, %xmm0
1747; SSE41-NEXT:    setb %al
1748; SSE41-NEXT:    retq
1749;
1750; AVX1-LABEL: icmp1_v32i16_v32i1:
1751; AVX1:       # %bb.0:
1752; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1753; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1754; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1755; AVX1-NEXT:    vptest %ymm1, %ymm0
1756; AVX1-NEXT:    setb %al
1757; AVX1-NEXT:    vzeroupper
1758; AVX1-NEXT:    retq
1759;
1760; AVX2-LABEL: icmp1_v32i16_v32i1:
1761; AVX2:       # %bb.0:
1762; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1763; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1764; AVX2-NEXT:    vptest %ymm1, %ymm0
1765; AVX2-NEXT:    setb %al
1766; AVX2-NEXT:    vzeroupper
1767; AVX2-NEXT:    retq
1768;
1769; AVX512-LABEL: icmp1_v32i16_v32i1:
1770; AVX512:       # %bb.0:
1771; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
1772; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
1773; AVX512-NEXT:    kortestw %k0, %k0
1774; AVX512-NEXT:    sete %al
1775; AVX512-NEXT:    vzeroupper
1776; AVX512-NEXT:    retq
1777  %a = icmp eq <32 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1778  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
1779  ret i1 %b
1780}
1781
1782define i1 @icmp1_v64i8_v64i1(<64 x i8>) nounwind {
1783; X86-SSE2-LABEL: icmp1_v64i8_v64i1:
1784; X86-SSE2:       # %bb.0:
1785; X86-SSE2-NEXT:    pushl %ebp
1786; X86-SSE2-NEXT:    movl %esp, %ebp
1787; X86-SSE2-NEXT:    andl $-16, %esp
1788; X86-SSE2-NEXT:    subl $16, %esp
1789; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1790; X86-SSE2-NEXT:    pand 8(%ebp), %xmm1
1791; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1792; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1793; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
1794; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
1795; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1796; X86-SSE2-NEXT:    sete %al
1797; X86-SSE2-NEXT:    movl %ebp, %esp
1798; X86-SSE2-NEXT:    popl %ebp
1799; X86-SSE2-NEXT:    retl
1800;
1801; X64-SSE2-LABEL: icmp1_v64i8_v64i1:
1802; X64-SSE2:       # %bb.0:
1803; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1804; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1805; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1806; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1807; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
1808; X64-SSE2-NEXT:    pmovmskb %xmm1, %eax
1809; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1810; X64-SSE2-NEXT:    sete %al
1811; X64-SSE2-NEXT:    retq
1812;
1813; SSE41-LABEL: icmp1_v64i8_v64i1:
1814; SSE41:       # %bb.0:
1815; SSE41-NEXT:    pand %xmm3, %xmm1
1816; SSE41-NEXT:    pand %xmm2, %xmm0
1817; SSE41-NEXT:    pand %xmm1, %xmm0
1818; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
1819; SSE41-NEXT:    ptest %xmm1, %xmm0
1820; SSE41-NEXT:    setb %al
1821; SSE41-NEXT:    retq
1822;
1823; AVX1-LABEL: icmp1_v64i8_v64i1:
1824; AVX1:       # %bb.0:
1825; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1826; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1827; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
1828; AVX1-NEXT:    vptest %ymm1, %ymm0
1829; AVX1-NEXT:    setb %al
1830; AVX1-NEXT:    vzeroupper
1831; AVX1-NEXT:    retq
1832;
1833; AVX2-LABEL: icmp1_v64i8_v64i1:
1834; AVX2:       # %bb.0:
1835; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1836; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1837; AVX2-NEXT:    vptest %ymm1, %ymm0
1838; AVX2-NEXT:    setb %al
1839; AVX2-NEXT:    vzeroupper
1840; AVX2-NEXT:    retq
1841;
1842; AVX512-LABEL: icmp1_v64i8_v64i1:
1843; AVX512:       # %bb.0:
1844; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
1845; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
1846; AVX512-NEXT:    kortestw %k0, %k0
1847; AVX512-NEXT:    sete %al
1848; AVX512-NEXT:    vzeroupper
1849; AVX512-NEXT:    retq
1850  %a = icmp eq <64 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1851  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
1852  ret i1 %b
1853}
1854
1855define i8 @icmp1_v8i1(<8 x i8>) nounwind {
1856; SSE2-LABEL: icmp1_v8i1:
1857; SSE2:       # %bb.0:
1858; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1859; SSE2-NEXT:    psllw $15, %xmm0
1860; SSE2-NEXT:    packsswb %xmm0, %xmm0
1861; SSE2-NEXT:    pmovmskb %xmm0, %eax
1862; SSE2-NEXT:    cmpb $-1, %al
1863; SSE2-NEXT:    sete %al
1864; SSE2-NEXT:    ret{{[l|q]}}
1865;
1866; SSE41-LABEL: icmp1_v8i1:
1867; SSE41:       # %bb.0:
1868; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1869; SSE41-NEXT:    psllw $15, %xmm0
1870; SSE41-NEXT:    packsswb %xmm0, %xmm0
1871; SSE41-NEXT:    pmovmskb %xmm0, %eax
1872; SSE41-NEXT:    cmpb $-1, %al
1873; SSE41-NEXT:    sete %al
1874; SSE41-NEXT:    retq
1875;
1876; AVX1OR2-LABEL: icmp1_v8i1:
1877; AVX1OR2:       # %bb.0:
1878; AVX1OR2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1879; AVX1OR2-NEXT:    vpsllw $15, %xmm0, %xmm0
1880; AVX1OR2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1881; AVX1OR2-NEXT:    vpmovmskb %xmm0, %eax
1882; AVX1OR2-NEXT:    cmpb $-1, %al
1883; AVX1OR2-NEXT:    sete %al
1884; AVX1OR2-NEXT:    retq
1885;
1886; AVX512F-LABEL: icmp1_v8i1:
1887; AVX512F:       # %bb.0:
1888; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1889; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1890; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
1891; AVX512F-NEXT:    kmovw %k0, %eax
1892; AVX512F-NEXT:    cmpb $-1, %al
1893; AVX512F-NEXT:    sete %al
1894; AVX512F-NEXT:    vzeroupper
1895; AVX512F-NEXT:    retq
1896;
1897; AVX512BW-LABEL: icmp1_v8i1:
1898; AVX512BW:       # %bb.0:
1899; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
1900; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
1901; AVX512BW-NEXT:    kmovd %k0, %eax
1902; AVX512BW-NEXT:    cmpb $-1, %al
1903; AVX512BW-NEXT:    sete %al
1904; AVX512BW-NEXT:    vzeroupper
1905; AVX512BW-NEXT:    retq
1906;
1907; AVX512VL-LABEL: icmp1_v8i1:
1908; AVX512VL:       # %bb.0:
1909; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
1910; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
1911; AVX512VL-NEXT:    kmovd %k0, %eax
1912; AVX512VL-NEXT:    cmpb $-1, %al
1913; AVX512VL-NEXT:    sete %al
1914; AVX512VL-NEXT:    retq
1915  %a = trunc <8 x i8> %0 to <8 x i1>
1916  %b = icmp eq <8 x i1> %a, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
1917  %c = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %b)
1918  %d = zext i1 %c to i8
1919  ret i8 %d
1920}
1921
1922;
1923; Comparison
1924;
1925
1926define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) nounwind {
1927; SSE2-LABEL: icmp_v2i64_v2i1:
1928; SSE2:       # %bb.0:
1929; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1930; SSE2-NEXT:    movmskps %xmm0, %eax
1931; SSE2-NEXT:    xorl $15, %eax
1932; SSE2-NEXT:    sete %al
1933; SSE2-NEXT:    ret{{[l|q]}}
1934;
1935; SSE41-LABEL: icmp_v2i64_v2i1:
1936; SSE41:       # %bb.0:
1937; SSE41-NEXT:    pxor %xmm1, %xmm0
1938; SSE41-NEXT:    ptest %xmm0, %xmm0
1939; SSE41-NEXT:    sete %al
1940; SSE41-NEXT:    retq
1941;
1942; AVX-LABEL: icmp_v2i64_v2i1:
1943; AVX:       # %bb.0:
1944; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1945; AVX-NEXT:    vptest %xmm0, %xmm0
1946; AVX-NEXT:    sete %al
1947; AVX-NEXT:    retq
1948  %a = icmp eq <2 x i64> %0, %1
1949  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
1950  ret i1 %b
1951}
1952
1953define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) nounwind {
1954; SSE2-LABEL: icmp_v4i32_v4i1:
1955; SSE2:       # %bb.0:
1956; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1957; SSE2-NEXT:    movmskps %xmm0, %eax
1958; SSE2-NEXT:    xorl $15, %eax
1959; SSE2-NEXT:    sete %al
1960; SSE2-NEXT:    ret{{[l|q]}}
1961;
1962; SSE41-LABEL: icmp_v4i32_v4i1:
1963; SSE41:       # %bb.0:
1964; SSE41-NEXT:    pxor %xmm1, %xmm0
1965; SSE41-NEXT:    ptest %xmm0, %xmm0
1966; SSE41-NEXT:    sete %al
1967; SSE41-NEXT:    retq
1968;
1969; AVX-LABEL: icmp_v4i32_v4i1:
1970; AVX:       # %bb.0:
1971; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1972; AVX-NEXT:    vptest %xmm0, %xmm0
1973; AVX-NEXT:    sete %al
1974; AVX-NEXT:    retq
1975  %a = icmp eq <4 x i32> %0, %1
1976  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
1977  ret i1 %b
1978}
1979
1980define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) nounwind {
1981; SSE2-LABEL: icmp_v8i16_v8i1:
1982; SSE2:       # %bb.0:
1983; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
1984; SSE2-NEXT:    pmovmskb %xmm0, %eax
1985; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
1986; SSE2-NEXT:    sete %al
1987; SSE2-NEXT:    ret{{[l|q]}}
1988;
1989; SSE41-LABEL: icmp_v8i16_v8i1:
1990; SSE41:       # %bb.0:
1991; SSE41-NEXT:    pxor %xmm1, %xmm0
1992; SSE41-NEXT:    ptest %xmm0, %xmm0
1993; SSE41-NEXT:    sete %al
1994; SSE41-NEXT:    retq
1995;
1996; AVX-LABEL: icmp_v8i16_v8i1:
1997; AVX:       # %bb.0:
1998; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1999; AVX-NEXT:    vptest %xmm0, %xmm0
2000; AVX-NEXT:    sete %al
2001; AVX-NEXT:    retq
2002  %a = icmp eq <8 x i16> %0, %1
2003  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
2004  ret i1 %b
2005}
2006
2007define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) nounwind {
2008; SSE2-LABEL: icmp_v16i8_v16i1:
2009; SSE2:       # %bb.0:
2010; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
2011; SSE2-NEXT:    pmovmskb %xmm0, %eax
2012; SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2013; SSE2-NEXT:    sete %al
2014; SSE2-NEXT:    ret{{[l|q]}}
2015;
2016; SSE41-LABEL: icmp_v16i8_v16i1:
2017; SSE41:       # %bb.0:
2018; SSE41-NEXT:    pxor %xmm1, %xmm0
2019; SSE41-NEXT:    ptest %xmm0, %xmm0
2020; SSE41-NEXT:    sete %al
2021; SSE41-NEXT:    retq
2022;
2023; AVX-LABEL: icmp_v16i8_v16i1:
2024; AVX:       # %bb.0:
2025; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2026; AVX-NEXT:    vptest %xmm0, %xmm0
2027; AVX-NEXT:    sete %al
2028; AVX-NEXT:    retq
2029  %a = icmp eq <16 x i8> %0, %1
2030  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
2031  ret i1 %b
2032}
2033
2034define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) nounwind {
2035; X86-SSE2-LABEL: icmp_v4i64_v4i1:
2036; X86-SSE2:       # %bb.0:
2037; X86-SSE2-NEXT:    pushl %ebp
2038; X86-SSE2-NEXT:    movl %esp, %ebp
2039; X86-SSE2-NEXT:    andl $-16, %esp
2040; X86-SSE2-NEXT:    subl $16, %esp
2041; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2042; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm1
2043; X86-SSE2-NEXT:    pand %xmm0, %xmm1
2044; X86-SSE2-NEXT:    movmskps %xmm1, %eax
2045; X86-SSE2-NEXT:    xorl $15, %eax
2046; X86-SSE2-NEXT:    sete %al
2047; X86-SSE2-NEXT:    movl %ebp, %esp
2048; X86-SSE2-NEXT:    popl %ebp
2049; X86-SSE2-NEXT:    retl
2050;
2051; X64-SSE2-LABEL: icmp_v4i64_v4i1:
2052; X64-SSE2:       # %bb.0:
2053; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
2054; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2055; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2056; X64-SSE2-NEXT:    movmskps %xmm0, %eax
2057; X64-SSE2-NEXT:    xorl $15, %eax
2058; X64-SSE2-NEXT:    sete %al
2059; X64-SSE2-NEXT:    retq
2060;
2061; SSE41-LABEL: icmp_v4i64_v4i1:
2062; SSE41:       # %bb.0:
2063; SSE41-NEXT:    pxor %xmm3, %xmm1
2064; SSE41-NEXT:    pxor %xmm2, %xmm0
2065; SSE41-NEXT:    por %xmm1, %xmm0
2066; SSE41-NEXT:    ptest %xmm0, %xmm0
2067; SSE41-NEXT:    sete %al
2068; SSE41-NEXT:    retq
2069;
2070; AVX1-LABEL: icmp_v4i64_v4i1:
2071; AVX1:       # %bb.0:
2072; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
2073; AVX1-NEXT:    vptest %ymm0, %ymm0
2074; AVX1-NEXT:    sete %al
2075; AVX1-NEXT:    vzeroupper
2076; AVX1-NEXT:    retq
2077;
2078; AVX2-LABEL: icmp_v4i64_v4i1:
2079; AVX2:       # %bb.0:
2080; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2081; AVX2-NEXT:    vptest %ymm0, %ymm0
2082; AVX2-NEXT:    sete %al
2083; AVX2-NEXT:    vzeroupper
2084; AVX2-NEXT:    retq
2085;
2086; AVX512-LABEL: icmp_v4i64_v4i1:
2087; AVX512:       # %bb.0:
2088; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2089; AVX512-NEXT:    vptest %ymm0, %ymm0
2090; AVX512-NEXT:    sete %al
2091; AVX512-NEXT:    vzeroupper
2092; AVX512-NEXT:    retq
2093  %a = icmp eq <4 x i64> %0, %1
2094  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
2095  ret i1 %b
2096}
2097
2098define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) nounwind {
2099; X86-SSE2-LABEL: icmp_v8i32_v8i1:
2100; X86-SSE2:       # %bb.0:
2101; X86-SSE2-NEXT:    pushl %ebp
2102; X86-SSE2-NEXT:    movl %esp, %ebp
2103; X86-SSE2-NEXT:    andl $-16, %esp
2104; X86-SSE2-NEXT:    subl $16, %esp
2105; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2106; X86-SSE2-NEXT:    pcmpeqd 8(%ebp), %xmm1
2107; X86-SSE2-NEXT:    pand %xmm0, %xmm1
2108; X86-SSE2-NEXT:    movmskps %xmm1, %eax
2109; X86-SSE2-NEXT:    xorl $15, %eax
2110; X86-SSE2-NEXT:    sete %al
2111; X86-SSE2-NEXT:    movl %ebp, %esp
2112; X86-SSE2-NEXT:    popl %ebp
2113; X86-SSE2-NEXT:    retl
2114;
2115; X64-SSE2-LABEL: icmp_v8i32_v8i1:
2116; X64-SSE2:       # %bb.0:
2117; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
2118; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
2119; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2120; X64-SSE2-NEXT:    movmskps %xmm0, %eax
2121; X64-SSE2-NEXT:    xorl $15, %eax
2122; X64-SSE2-NEXT:    sete %al
2123; X64-SSE2-NEXT:    retq
2124;
2125; SSE41-LABEL: icmp_v8i32_v8i1:
2126; SSE41:       # %bb.0:
2127; SSE41-NEXT:    pxor %xmm3, %xmm1
2128; SSE41-NEXT:    pxor %xmm2, %xmm0
2129; SSE41-NEXT:    por %xmm1, %xmm0
2130; SSE41-NEXT:    ptest %xmm0, %xmm0
2131; SSE41-NEXT:    sete %al
2132; SSE41-NEXT:    retq
2133;
2134; AVX1-LABEL: icmp_v8i32_v8i1:
2135; AVX1:       # %bb.0:
2136; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
2137; AVX1-NEXT:    vptest %ymm0, %ymm0
2138; AVX1-NEXT:    sete %al
2139; AVX1-NEXT:    vzeroupper
2140; AVX1-NEXT:    retq
2141;
2142; AVX2-LABEL: icmp_v8i32_v8i1:
2143; AVX2:       # %bb.0:
2144; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2145; AVX2-NEXT:    vptest %ymm0, %ymm0
2146; AVX2-NEXT:    sete %al
2147; AVX2-NEXT:    vzeroupper
2148; AVX2-NEXT:    retq
2149;
2150; AVX512-LABEL: icmp_v8i32_v8i1:
2151; AVX512:       # %bb.0:
2152; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2153; AVX512-NEXT:    vptest %ymm0, %ymm0
2154; AVX512-NEXT:    sete %al
2155; AVX512-NEXT:    vzeroupper
2156; AVX512-NEXT:    retq
2157  %a = icmp eq <8 x i32> %0, %1
2158  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
2159  ret i1 %b
2160}
2161
2162define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) nounwind {
2163; X86-SSE2-LABEL: icmp_v16i16_v16i1:
2164; X86-SSE2:       # %bb.0:
2165; X86-SSE2-NEXT:    pushl %ebp
2166; X86-SSE2-NEXT:    movl %esp, %ebp
2167; X86-SSE2-NEXT:    andl $-16, %esp
2168; X86-SSE2-NEXT:    subl $16, %esp
2169; X86-SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
2170; X86-SSE2-NEXT:    pcmpeqb 8(%ebp), %xmm1
2171; X86-SSE2-NEXT:    pand %xmm0, %xmm1
2172; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
2173; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2174; X86-SSE2-NEXT:    sete %al
2175; X86-SSE2-NEXT:    movl %ebp, %esp
2176; X86-SSE2-NEXT:    popl %ebp
2177; X86-SSE2-NEXT:    retl
2178;
2179; X64-SSE2-LABEL: icmp_v16i16_v16i1:
2180; X64-SSE2:       # %bb.0:
2181; X64-SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
2182; X64-SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
2183; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2184; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
2185; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2186; X64-SSE2-NEXT:    sete %al
2187; X64-SSE2-NEXT:    retq
2188;
2189; SSE41-LABEL: icmp_v16i16_v16i1:
2190; SSE41:       # %bb.0:
2191; SSE41-NEXT:    pxor %xmm3, %xmm1
2192; SSE41-NEXT:    pxor %xmm2, %xmm0
2193; SSE41-NEXT:    por %xmm1, %xmm0
2194; SSE41-NEXT:    ptest %xmm0, %xmm0
2195; SSE41-NEXT:    sete %al
2196; SSE41-NEXT:    retq
2197;
2198; AVX1-LABEL: icmp_v16i16_v16i1:
2199; AVX1:       # %bb.0:
2200; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
2201; AVX1-NEXT:    vptest %ymm0, %ymm0
2202; AVX1-NEXT:    sete %al
2203; AVX1-NEXT:    vzeroupper
2204; AVX1-NEXT:    retq
2205;
2206; AVX2-LABEL: icmp_v16i16_v16i1:
2207; AVX2:       # %bb.0:
2208; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2209; AVX2-NEXT:    vptest %ymm0, %ymm0
2210; AVX2-NEXT:    sete %al
2211; AVX2-NEXT:    vzeroupper
2212; AVX2-NEXT:    retq
2213;
2214; AVX512-LABEL: icmp_v16i16_v16i1:
2215; AVX512:       # %bb.0:
2216; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2217; AVX512-NEXT:    vptest %ymm0, %ymm0
2218; AVX512-NEXT:    sete %al
2219; AVX512-NEXT:    vzeroupper
2220; AVX512-NEXT:    retq
2221  %a = icmp eq <16 x i16> %0, %1
2222  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
2223  ret i1 %b
2224}
2225
2226define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) nounwind {
2227; X86-SSE2-LABEL: icmp_v32i8_v32i1:
2228; X86-SSE2:       # %bb.0:
2229; X86-SSE2-NEXT:    pushl %ebp
2230; X86-SSE2-NEXT:    movl %esp, %ebp
2231; X86-SSE2-NEXT:    andl $-16, %esp
2232; X86-SSE2-NEXT:    subl $16, %esp
2233; X86-SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
2234; X86-SSE2-NEXT:    pcmpeqb 8(%ebp), %xmm1
2235; X86-SSE2-NEXT:    pand %xmm0, %xmm1
2236; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
2237; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2238; X86-SSE2-NEXT:    sete %al
2239; X86-SSE2-NEXT:    movl %ebp, %esp
2240; X86-SSE2-NEXT:    popl %ebp
2241; X86-SSE2-NEXT:    retl
2242;
2243; X64-SSE2-LABEL: icmp_v32i8_v32i1:
2244; X64-SSE2:       # %bb.0:
2245; X64-SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
2246; X64-SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
2247; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2248; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
2249; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2250; X64-SSE2-NEXT:    sete %al
2251; X64-SSE2-NEXT:    retq
2252;
2253; SSE41-LABEL: icmp_v32i8_v32i1:
2254; SSE41:       # %bb.0:
2255; SSE41-NEXT:    pxor %xmm3, %xmm1
2256; SSE41-NEXT:    pxor %xmm2, %xmm0
2257; SSE41-NEXT:    por %xmm1, %xmm0
2258; SSE41-NEXT:    ptest %xmm0, %xmm0
2259; SSE41-NEXT:    sete %al
2260; SSE41-NEXT:    retq
2261;
2262; AVX1-LABEL: icmp_v32i8_v32i1:
2263; AVX1:       # %bb.0:
2264; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
2265; AVX1-NEXT:    vptest %ymm0, %ymm0
2266; AVX1-NEXT:    sete %al
2267; AVX1-NEXT:    vzeroupper
2268; AVX1-NEXT:    retq
2269;
2270; AVX2-LABEL: icmp_v32i8_v32i1:
2271; AVX2:       # %bb.0:
2272; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2273; AVX2-NEXT:    vptest %ymm0, %ymm0
2274; AVX2-NEXT:    sete %al
2275; AVX2-NEXT:    vzeroupper
2276; AVX2-NEXT:    retq
2277;
2278; AVX512-LABEL: icmp_v32i8_v32i1:
2279; AVX512:       # %bb.0:
2280; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2281; AVX512-NEXT:    vptest %ymm0, %ymm0
2282; AVX512-NEXT:    sete %al
2283; AVX512-NEXT:    vzeroupper
2284; AVX512-NEXT:    retq
2285  %a = icmp eq <32 x i8> %0, %1
2286  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
2287  ret i1 %b
2288}
2289
2290define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) nounwind {
2291; X86-SSE2-LABEL: icmp_v8i64_v8i1:
2292; X86-SSE2:       # %bb.0:
2293; X86-SSE2-NEXT:    pushl %ebp
2294; X86-SSE2-NEXT:    movl %esp, %ebp
2295; X86-SSE2-NEXT:    andl $-16, %esp
2296; X86-SSE2-NEXT:    subl $16, %esp
2297; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2298; X86-SSE2-NEXT:    pcmpeqd 72(%ebp), %xmm3
2299; X86-SSE2-NEXT:    pcmpeqd 40(%ebp), %xmm1
2300; X86-SSE2-NEXT:    pand %xmm3, %xmm1
2301; X86-SSE2-NEXT:    pcmpeqd 56(%ebp), %xmm2
2302; X86-SSE2-NEXT:    pcmpeqd 24(%ebp), %xmm0
2303; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2304; X86-SSE2-NEXT:    pand %xmm1, %xmm0
2305; X86-SSE2-NEXT:    movmskps %xmm0, %eax
2306; X86-SSE2-NEXT:    xorl $15, %eax
2307; X86-SSE2-NEXT:    sete %al
2308; X86-SSE2-NEXT:    movl %ebp, %esp
2309; X86-SSE2-NEXT:    popl %ebp
2310; X86-SSE2-NEXT:    retl
2311;
2312; X64-SSE2-LABEL: icmp_v8i64_v8i1:
2313; X64-SSE2:       # %bb.0:
2314; X64-SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
2315; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
2316; X64-SSE2-NEXT:    pand %xmm3, %xmm1
2317; X64-SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
2318; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
2319; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2320; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2321; X64-SSE2-NEXT:    movmskps %xmm0, %eax
2322; X64-SSE2-NEXT:    xorl $15, %eax
2323; X64-SSE2-NEXT:    sete %al
2324; X64-SSE2-NEXT:    retq
2325;
2326; SSE41-LABEL: icmp_v8i64_v8i1:
2327; SSE41:       # %bb.0:
2328; SSE41-NEXT:    pxor %xmm7, %xmm3
2329; SSE41-NEXT:    pxor %xmm5, %xmm1
2330; SSE41-NEXT:    por %xmm3, %xmm1
2331; SSE41-NEXT:    pxor %xmm6, %xmm2
2332; SSE41-NEXT:    pxor %xmm4, %xmm0
2333; SSE41-NEXT:    por %xmm2, %xmm0
2334; SSE41-NEXT:    por %xmm1, %xmm0
2335; SSE41-NEXT:    ptest %xmm0, %xmm0
2336; SSE41-NEXT:    sete %al
2337; SSE41-NEXT:    retq
2338;
2339; AVX1-LABEL: icmp_v8i64_v8i1:
2340; AVX1:       # %bb.0:
2341; AVX1-NEXT:    vxorps %ymm3, %ymm1, %ymm1
2342; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
2343; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
2344; AVX1-NEXT:    vptest %ymm0, %ymm0
2345; AVX1-NEXT:    sete %al
2346; AVX1-NEXT:    vzeroupper
2347; AVX1-NEXT:    retq
2348;
2349; AVX2-LABEL: icmp_v8i64_v8i1:
2350; AVX2:       # %bb.0:
2351; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm1
2352; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
2353; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
2354; AVX2-NEXT:    vptest %ymm0, %ymm0
2355; AVX2-NEXT:    sete %al
2356; AVX2-NEXT:    vzeroupper
2357; AVX2-NEXT:    retq
2358;
2359; AVX512-LABEL: icmp_v8i64_v8i1:
2360; AVX512:       # %bb.0:
2361; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
2362; AVX512-NEXT:    kortestw %k0, %k0
2363; AVX512-NEXT:    sete %al
2364; AVX512-NEXT:    vzeroupper
2365; AVX512-NEXT:    retq
2366  %a = icmp eq <8 x i64> %0, %1
2367  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
2368  ret i1 %b
2369}
2370
2371define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) nounwind {
2372; X86-SSE2-LABEL: icmp_v16i32_v16i1:
2373; X86-SSE2:       # %bb.0:
2374; X86-SSE2-NEXT:    pushl %ebp
2375; X86-SSE2-NEXT:    movl %esp, %ebp
2376; X86-SSE2-NEXT:    andl $-16, %esp
2377; X86-SSE2-NEXT:    subl $16, %esp
2378; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2379; X86-SSE2-NEXT:    pcmpeqd 72(%ebp), %xmm3
2380; X86-SSE2-NEXT:    pcmpeqd 40(%ebp), %xmm1
2381; X86-SSE2-NEXT:    pand %xmm3, %xmm1
2382; X86-SSE2-NEXT:    pcmpeqd 56(%ebp), %xmm2
2383; X86-SSE2-NEXT:    pcmpeqd 24(%ebp), %xmm0
2384; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2385; X86-SSE2-NEXT:    pand %xmm1, %xmm0
2386; X86-SSE2-NEXT:    movmskps %xmm0, %eax
2387; X86-SSE2-NEXT:    xorl $15, %eax
2388; X86-SSE2-NEXT:    sete %al
2389; X86-SSE2-NEXT:    movl %ebp, %esp
2390; X86-SSE2-NEXT:    popl %ebp
2391; X86-SSE2-NEXT:    retl
2392;
2393; X64-SSE2-LABEL: icmp_v16i32_v16i1:
2394; X64-SSE2:       # %bb.0:
2395; X64-SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
2396; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
2397; X64-SSE2-NEXT:    pand %xmm3, %xmm1
2398; X64-SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
2399; X64-SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
2400; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2401; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2402; X64-SSE2-NEXT:    movmskps %xmm0, %eax
2403; X64-SSE2-NEXT:    xorl $15, %eax
2404; X64-SSE2-NEXT:    sete %al
2405; X64-SSE2-NEXT:    retq
2406;
2407; SSE41-LABEL: icmp_v16i32_v16i1:
2408; SSE41:       # %bb.0:
2409; SSE41-NEXT:    pxor %xmm7, %xmm3
2410; SSE41-NEXT:    pxor %xmm5, %xmm1
2411; SSE41-NEXT:    por %xmm3, %xmm1
2412; SSE41-NEXT:    pxor %xmm6, %xmm2
2413; SSE41-NEXT:    pxor %xmm4, %xmm0
2414; SSE41-NEXT:    por %xmm2, %xmm0
2415; SSE41-NEXT:    por %xmm1, %xmm0
2416; SSE41-NEXT:    ptest %xmm0, %xmm0
2417; SSE41-NEXT:    sete %al
2418; SSE41-NEXT:    retq
2419;
2420; AVX1-LABEL: icmp_v16i32_v16i1:
2421; AVX1:       # %bb.0:
2422; AVX1-NEXT:    vxorps %ymm3, %ymm1, %ymm1
2423; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
2424; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
2425; AVX1-NEXT:    vptest %ymm0, %ymm0
2426; AVX1-NEXT:    sete %al
2427; AVX1-NEXT:    vzeroupper
2428; AVX1-NEXT:    retq
2429;
2430; AVX2-LABEL: icmp_v16i32_v16i1:
2431; AVX2:       # %bb.0:
2432; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm1
2433; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
2434; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
2435; AVX2-NEXT:    vptest %ymm0, %ymm0
2436; AVX2-NEXT:    sete %al
2437; AVX2-NEXT:    vzeroupper
2438; AVX2-NEXT:    retq
2439;
2440; AVX512-LABEL: icmp_v16i32_v16i1:
2441; AVX512:       # %bb.0:
2442; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
2443; AVX512-NEXT:    kortestw %k0, %k0
2444; AVX512-NEXT:    sete %al
2445; AVX512-NEXT:    vzeroupper
2446; AVX512-NEXT:    retq
2447  %a = icmp eq <16 x i32> %0, %1
2448  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
2449  ret i1 %b
2450}
2451
2452define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) nounwind {
2453; X86-SSE2-LABEL: icmp_v32i16_v32i1:
2454; X86-SSE2:       # %bb.0:
2455; X86-SSE2-NEXT:    pushl %ebp
2456; X86-SSE2-NEXT:    movl %esp, %ebp
2457; X86-SSE2-NEXT:    andl $-16, %esp
2458; X86-SSE2-NEXT:    subl $16, %esp
2459; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2460; X86-SSE2-NEXT:    pcmpeqb 72(%ebp), %xmm3
2461; X86-SSE2-NEXT:    pcmpeqb 40(%ebp), %xmm1
2462; X86-SSE2-NEXT:    pand %xmm3, %xmm1
2463; X86-SSE2-NEXT:    pcmpeqb 56(%ebp), %xmm2
2464; X86-SSE2-NEXT:    pcmpeqb 24(%ebp), %xmm0
2465; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2466; X86-SSE2-NEXT:    pand %xmm1, %xmm0
2467; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2468; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2469; X86-SSE2-NEXT:    sete %al
2470; X86-SSE2-NEXT:    movl %ebp, %esp
2471; X86-SSE2-NEXT:    popl %ebp
2472; X86-SSE2-NEXT:    retl
2473;
2474; X64-SSE2-LABEL: icmp_v32i16_v32i1:
2475; X64-SSE2:       # %bb.0:
2476; X64-SSE2-NEXT:    pcmpeqb %xmm7, %xmm3
2477; X64-SSE2-NEXT:    pcmpeqb %xmm5, %xmm1
2478; X64-SSE2-NEXT:    pand %xmm3, %xmm1
2479; X64-SSE2-NEXT:    pcmpeqb %xmm6, %xmm2
2480; X64-SSE2-NEXT:    pcmpeqb %xmm4, %xmm0
2481; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2482; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2483; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
2484; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2485; X64-SSE2-NEXT:    sete %al
2486; X64-SSE2-NEXT:    retq
2487;
2488; SSE41-LABEL: icmp_v32i16_v32i1:
2489; SSE41:       # %bb.0:
2490; SSE41-NEXT:    pxor %xmm7, %xmm3
2491; SSE41-NEXT:    pxor %xmm5, %xmm1
2492; SSE41-NEXT:    por %xmm3, %xmm1
2493; SSE41-NEXT:    pxor %xmm6, %xmm2
2494; SSE41-NEXT:    pxor %xmm4, %xmm0
2495; SSE41-NEXT:    por %xmm2, %xmm0
2496; SSE41-NEXT:    por %xmm1, %xmm0
2497; SSE41-NEXT:    ptest %xmm0, %xmm0
2498; SSE41-NEXT:    sete %al
2499; SSE41-NEXT:    retq
2500;
2501; AVX1-LABEL: icmp_v32i16_v32i1:
2502; AVX1:       # %bb.0:
2503; AVX1-NEXT:    vxorps %ymm3, %ymm1, %ymm1
2504; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
2505; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
2506; AVX1-NEXT:    vptest %ymm0, %ymm0
2507; AVX1-NEXT:    sete %al
2508; AVX1-NEXT:    vzeroupper
2509; AVX1-NEXT:    retq
2510;
2511; AVX2-LABEL: icmp_v32i16_v32i1:
2512; AVX2:       # %bb.0:
2513; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm1
2514; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
2515; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
2516; AVX2-NEXT:    vptest %ymm0, %ymm0
2517; AVX2-NEXT:    sete %al
2518; AVX2-NEXT:    vzeroupper
2519; AVX2-NEXT:    retq
2520;
2521; AVX512-LABEL: icmp_v32i16_v32i1:
2522; AVX512:       # %bb.0:
2523; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
2524; AVX512-NEXT:    kortestw %k0, %k0
2525; AVX512-NEXT:    sete %al
2526; AVX512-NEXT:    vzeroupper
2527; AVX512-NEXT:    retq
2528  %a = icmp eq <32 x i16> %0, %1
2529  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
2530  ret i1 %b
2531}
2532
2533define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) nounwind {
2534; X86-SSE2-LABEL: icmp_v64i8_v64i1:
2535; X86-SSE2:       # %bb.0:
2536; X86-SSE2-NEXT:    pushl %ebp
2537; X86-SSE2-NEXT:    movl %esp, %ebp
2538; X86-SSE2-NEXT:    andl $-16, %esp
2539; X86-SSE2-NEXT:    subl $16, %esp
2540; X86-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
2541; X86-SSE2-NEXT:    pcmpeqb 72(%ebp), %xmm3
2542; X86-SSE2-NEXT:    pcmpeqb 40(%ebp), %xmm1
2543; X86-SSE2-NEXT:    pand %xmm3, %xmm1
2544; X86-SSE2-NEXT:    pcmpeqb 56(%ebp), %xmm2
2545; X86-SSE2-NEXT:    pcmpeqb 24(%ebp), %xmm0
2546; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2547; X86-SSE2-NEXT:    pand %xmm1, %xmm0
2548; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
2549; X86-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2550; X86-SSE2-NEXT:    sete %al
2551; X86-SSE2-NEXT:    movl %ebp, %esp
2552; X86-SSE2-NEXT:    popl %ebp
2553; X86-SSE2-NEXT:    retl
2554;
2555; X64-SSE2-LABEL: icmp_v64i8_v64i1:
2556; X64-SSE2:       # %bb.0:
2557; X64-SSE2-NEXT:    pcmpeqb %xmm7, %xmm3
2558; X64-SSE2-NEXT:    pcmpeqb %xmm5, %xmm1
2559; X64-SSE2-NEXT:    pand %xmm3, %xmm1
2560; X64-SSE2-NEXT:    pcmpeqb %xmm6, %xmm2
2561; X64-SSE2-NEXT:    pcmpeqb %xmm4, %xmm0
2562; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2563; X64-SSE2-NEXT:    pand %xmm1, %xmm0
2564; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
2565; X64-SSE2-NEXT:    xorl $65535, %eax # imm = 0xFFFF
2566; X64-SSE2-NEXT:    sete %al
2567; X64-SSE2-NEXT:    retq
2568;
2569; SSE41-LABEL: icmp_v64i8_v64i1:
2570; SSE41:       # %bb.0:
2571; SSE41-NEXT:    pxor %xmm7, %xmm3
2572; SSE41-NEXT:    pxor %xmm5, %xmm1
2573; SSE41-NEXT:    por %xmm3, %xmm1
2574; SSE41-NEXT:    pxor %xmm6, %xmm2
2575; SSE41-NEXT:    pxor %xmm4, %xmm0
2576; SSE41-NEXT:    por %xmm2, %xmm0
2577; SSE41-NEXT:    por %xmm1, %xmm0
2578; SSE41-NEXT:    ptest %xmm0, %xmm0
2579; SSE41-NEXT:    sete %al
2580; SSE41-NEXT:    retq
2581;
2582; AVX1-LABEL: icmp_v64i8_v64i1:
2583; AVX1:       # %bb.0:
2584; AVX1-NEXT:    vxorps %ymm3, %ymm1, %ymm1
2585; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
2586; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
2587; AVX1-NEXT:    vptest %ymm0, %ymm0
2588; AVX1-NEXT:    sete %al
2589; AVX1-NEXT:    vzeroupper
2590; AVX1-NEXT:    retq
2591;
2592; AVX2-LABEL: icmp_v64i8_v64i1:
2593; AVX2:       # %bb.0:
2594; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm1
2595; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
2596; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
2597; AVX2-NEXT:    vptest %ymm0, %ymm0
2598; AVX2-NEXT:    sete %al
2599; AVX2-NEXT:    vzeroupper
2600; AVX2-NEXT:    retq
2601;
2602; AVX512-LABEL: icmp_v64i8_v64i1:
2603; AVX512:       # %bb.0:
2604; AVX512-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
2605; AVX512-NEXT:    kortestw %k0, %k0
2606; AVX512-NEXT:    sete %al
2607; AVX512-NEXT:    vzeroupper
2608; AVX512-NEXT:    retq
2609  %a = icmp eq <64 x i8> %0, %1
2610  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
2611  ret i1 %b
2612}
2613
2614declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
2615declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
2616declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
2617declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>)
2618declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
2619declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
2620;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2621; SSE: {{.*}}
2622; X64-SSE: {{.*}}
2623