xref: /llvm-project/llvm/test/CodeGen/X86/vector-compare-results.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
9
10;
11; 128-bit vector comparisons
12;
13
14define <2 x i1> @test_cmp_v2f64(<2 x double> %a0, <2 x double> %a1) nounwind {
15; SSE-LABEL: test_cmp_v2f64:
16; SSE:       # %bb.0:
17; SSE-NEXT:    cmpltpd %xmm0, %xmm1
18; SSE-NEXT:    movapd %xmm1, %xmm0
19; SSE-NEXT:    retq
20;
21; AVX-LABEL: test_cmp_v2f64:
22; AVX:       # %bb.0:
23; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
24; AVX-NEXT:    retq
25  %1 = fcmp ogt <2 x double> %a0, %a1
26  ret <2 x i1> %1
27}
28
29define <4 x i1> @test_cmp_v4f32(<4 x float> %a0, <4 x float> %a1) nounwind {
30; SSE-LABEL: test_cmp_v4f32:
31; SSE:       # %bb.0:
32; SSE-NEXT:    cmpltps %xmm0, %xmm1
33; SSE-NEXT:    movaps %xmm1, %xmm0
34; SSE-NEXT:    retq
35;
36; AVX-LABEL: test_cmp_v4f32:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
39; AVX-NEXT:    retq
40  %1 = fcmp ogt <4 x float> %a0, %a1
41  ret <4 x i1> %1
42}
43
44define <2 x i1> @test_cmp_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
45; SSE2-LABEL: test_cmp_v2i64:
46; SSE2:       # %bb.0:
47; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
48; SSE2-NEXT:    pxor %xmm2, %xmm1
49; SSE2-NEXT:    pxor %xmm2, %xmm0
50; SSE2-NEXT:    movdqa %xmm0, %xmm2
51; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
52; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
53; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
54; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
55; SSE2-NEXT:    pand %xmm3, %xmm1
56; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
57; SSE2-NEXT:    por %xmm1, %xmm0
58; SSE2-NEXT:    retq
59;
60; SSE42-LABEL: test_cmp_v2i64:
61; SSE42:       # %bb.0:
62; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
63; SSE42-NEXT:    retq
64;
65; AVX-LABEL: test_cmp_v2i64:
66; AVX:       # %bb.0:
67; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
68; AVX-NEXT:    retq
69  %1 = icmp sgt <2 x i64> %a0, %a1
70  ret <2 x i1> %1
71}
72
73define <4 x i1> @test_cmp_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
74; SSE-LABEL: test_cmp_v4i32:
75; SSE:       # %bb.0:
76; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: test_cmp_v4i32:
80; AVX:       # %bb.0:
81; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
82; AVX-NEXT:    retq
83  %1 = icmp sgt <4 x i32> %a0, %a1
84  ret <4 x i1> %1
85}
86
87define <8 x i1> @test_cmp_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind {
88; SSE-LABEL: test_cmp_v8i16:
89; SSE:       # %bb.0:
90; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
91; SSE-NEXT:    retq
92;
93; AVX-LABEL: test_cmp_v8i16:
94; AVX:       # %bb.0:
95; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
96; AVX-NEXT:    retq
97  %1 = icmp sgt <8 x i16> %a0, %a1
98  ret <8 x i1> %1
99}
100
101define <16 x i1> @test_cmp_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind {
102; SSE-LABEL: test_cmp_v16i8:
103; SSE:       # %bb.0:
104; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
105; SSE-NEXT:    retq
106;
107; AVX-LABEL: test_cmp_v16i8:
108; AVX:       # %bb.0:
109; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
110; AVX-NEXT:    retq
111  %1 = icmp sgt <16 x i8> %a0, %a1
112  ret <16 x i1> %1
113}
114
115;
116; 256-bit vector comparisons
117;
118
119define <4 x i1> @test_cmp_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind {
120; SSE-LABEL: test_cmp_v4f64:
121; SSE:       # %bb.0:
122; SSE-NEXT:    cmpltpd %xmm1, %xmm3
123; SSE-NEXT:    cmpltpd %xmm0, %xmm2
124; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
125; SSE-NEXT:    movaps %xmm2, %xmm0
126; SSE-NEXT:    retq
127;
128; AVX1-LABEL: test_cmp_v4f64:
129; AVX1:       # %bb.0:
130; AVX1-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
131; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
132; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
133; AVX1-NEXT:    vzeroupper
134; AVX1-NEXT:    retq
135;
136; AVX2-LABEL: test_cmp_v4f64:
137; AVX2:       # %bb.0:
138; AVX2-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
139; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
140; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
141; AVX2-NEXT:    vzeroupper
142; AVX2-NEXT:    retq
143;
144; AVX512-LABEL: test_cmp_v4f64:
145; AVX512:       # %bb.0:
146; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
147; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
148; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
149; AVX512-NEXT:    vzeroupper
150; AVX512-NEXT:    retq
151  %1 = fcmp ogt <4 x double> %a0, %a1
152  ret <4 x i1> %1
153}
154
155define <8 x i1> @test_cmp_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind {
156; SSE-LABEL: test_cmp_v8f32:
157; SSE:       # %bb.0:
158; SSE-NEXT:    cmpltps %xmm1, %xmm3
159; SSE-NEXT:    cmpltps %xmm0, %xmm2
160; SSE-NEXT:    packssdw %xmm3, %xmm2
161; SSE-NEXT:    movdqa %xmm2, %xmm0
162; SSE-NEXT:    retq
163;
164; AVX1-LABEL: test_cmp_v8f32:
165; AVX1:       # %bb.0:
166; AVX1-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
167; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
168; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
169; AVX1-NEXT:    vzeroupper
170; AVX1-NEXT:    retq
171;
172; AVX2-LABEL: test_cmp_v8f32:
173; AVX2:       # %bb.0:
174; AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
175; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
176; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
177; AVX2-NEXT:    vzeroupper
178; AVX2-NEXT:    retq
179;
180; AVX512-LABEL: test_cmp_v8f32:
181; AVX512:       # %bb.0:
182; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
183; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
184; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
185; AVX512-NEXT:    vzeroupper
186; AVX512-NEXT:    retq
187  %1 = fcmp ogt <8 x float> %a0, %a1
188  ret <8 x i1> %1
189}
190
191define <4 x i1> @test_cmp_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
192; SSE2-LABEL: test_cmp_v4i64:
193; SSE2:       # %bb.0:
194; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
195; SSE2-NEXT:    pxor %xmm4, %xmm3
196; SSE2-NEXT:    pxor %xmm4, %xmm1
197; SSE2-NEXT:    movdqa %xmm1, %xmm5
198; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
199; SSE2-NEXT:    pxor %xmm4, %xmm2
200; SSE2-NEXT:    pxor %xmm4, %xmm0
201; SSE2-NEXT:    movdqa %xmm0, %xmm4
202; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
203; SSE2-NEXT:    movdqa %xmm4, %xmm6
204; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2]
205; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
206; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
207; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
208; SSE2-NEXT:    andps %xmm6, %xmm0
209; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
210; SSE2-NEXT:    orps %xmm4, %xmm0
211; SSE2-NEXT:    retq
212;
213; SSE42-LABEL: test_cmp_v4i64:
214; SSE42:       # %bb.0:
215; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
216; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
217; SSE42-NEXT:    packssdw %xmm1, %xmm0
218; SSE42-NEXT:    retq
219;
220; AVX1-LABEL: test_cmp_v4i64:
221; AVX1:       # %bb.0:
222; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
223; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
224; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
225; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
226; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
227; AVX1-NEXT:    vzeroupper
228; AVX1-NEXT:    retq
229;
230; AVX2-LABEL: test_cmp_v4i64:
231; AVX2:       # %bb.0:
232; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
233; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
234; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
235; AVX2-NEXT:    vzeroupper
236; AVX2-NEXT:    retq
237;
238; AVX512-LABEL: test_cmp_v4i64:
239; AVX512:       # %bb.0:
240; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
241; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
242; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
243; AVX512-NEXT:    vzeroupper
244; AVX512-NEXT:    retq
245  %1 = icmp sgt <4 x i64> %a0, %a1
246  ret <4 x i1> %1
247}
248
249define <8 x i1> @test_cmp_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind {
250; SSE-LABEL: test_cmp_v8i32:
251; SSE:       # %bb.0:
252; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
253; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
254; SSE-NEXT:    packssdw %xmm1, %xmm0
255; SSE-NEXT:    retq
256;
257; AVX1-LABEL: test_cmp_v8i32:
258; AVX1:       # %bb.0:
259; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
260; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
261; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
262; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
263; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
264; AVX1-NEXT:    vzeroupper
265; AVX1-NEXT:    retq
266;
267; AVX2-LABEL: test_cmp_v8i32:
268; AVX2:       # %bb.0:
269; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
270; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
271; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
272; AVX2-NEXT:    vzeroupper
273; AVX2-NEXT:    retq
274;
275; AVX512-LABEL: test_cmp_v8i32:
276; AVX512:       # %bb.0:
277; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
278; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
279; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
280; AVX512-NEXT:    vzeroupper
281; AVX512-NEXT:    retq
282  %1 = icmp sgt <8 x i32> %a0, %a1
283  ret <8 x i1> %1
284}
285
286define <16 x i1> @test_cmp_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind {
287; SSE-LABEL: test_cmp_v16i16:
288; SSE:       # %bb.0:
289; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
290; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
291; SSE-NEXT:    packsswb %xmm1, %xmm0
292; SSE-NEXT:    retq
293;
294; AVX1-LABEL: test_cmp_v16i16:
295; AVX1:       # %bb.0:
296; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
297; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
298; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
299; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
300; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
301; AVX1-NEXT:    vzeroupper
302; AVX1-NEXT:    retq
303;
304; AVX2-LABEL: test_cmp_v16i16:
305; AVX2:       # %bb.0:
306; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
307; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
308; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
309; AVX2-NEXT:    vzeroupper
310; AVX2-NEXT:    retq
311;
312; AVX512F-LABEL: test_cmp_v16i16:
313; AVX512F:       # %bb.0:
314; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
315; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
316; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
317; AVX512F-NEXT:    vzeroupper
318; AVX512F-NEXT:    retq
319;
320; AVX512DQ-LABEL: test_cmp_v16i16:
321; AVX512DQ:       # %bb.0:
322; AVX512DQ-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
323; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
324; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
325; AVX512DQ-NEXT:    vzeroupper
326; AVX512DQ-NEXT:    retq
327;
328; AVX512BW-LABEL: test_cmp_v16i16:
329; AVX512BW:       # %bb.0:
330; AVX512BW-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
331; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
332; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
333; AVX512BW-NEXT:    vzeroupper
334; AVX512BW-NEXT:    retq
335  %1 = icmp sgt <16 x i16> %a0, %a1
336  ret <16 x i1> %1
337}
338
339define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind {
340; SSE-LABEL: test_cmp_v32i8:
341; SSE:       # %bb.0:
342; SSE-NEXT:    movq %rdi, %rax
343; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
344; SSE-NEXT:    pmovmskb %xmm0, %ecx
345; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
346; SSE-NEXT:    pmovmskb %xmm1, %edx
347; SSE-NEXT:    shll $16, %edx
348; SSE-NEXT:    orl %ecx, %edx
349; SSE-NEXT:    movl %edx, (%rdi)
350; SSE-NEXT:    retq
351;
352; AVX1-LABEL: test_cmp_v32i8:
353; AVX1:       # %bb.0:
354; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
355; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
356; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
357; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
358; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
359; AVX1-NEXT:    retq
360;
361; AVX2-LABEL: test_cmp_v32i8:
362; AVX2:       # %bb.0:
363; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
364; AVX2-NEXT:    retq
365;
366; AVX512-LABEL: test_cmp_v32i8:
367; AVX512:       # %bb.0:
368; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
369; AVX512-NEXT:    retq
370  %1 = icmp sgt <32 x i8> %a0, %a1
371  ret <32 x i1> %1
372}
373
374;
375; 512-bit vector comparisons
376;
377
378define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
379; SSE-LABEL: test_cmp_v8f64:
380; SSE:       # %bb.0:
381; SSE-NEXT:    cmpltpd %xmm3, %xmm7
382; SSE-NEXT:    cmpltpd %xmm2, %xmm6
383; SSE-NEXT:    packssdw %xmm7, %xmm6
384; SSE-NEXT:    cmpltpd %xmm1, %xmm5
385; SSE-NEXT:    cmpltpd %xmm0, %xmm4
386; SSE-NEXT:    packssdw %xmm5, %xmm4
387; SSE-NEXT:    packssdw %xmm6, %xmm4
388; SSE-NEXT:    movdqa %xmm4, %xmm0
389; SSE-NEXT:    retq
390;
391; AVX1-LABEL: test_cmp_v8f64:
392; AVX1:       # %bb.0:
393; AVX1-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
394; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
395; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
396; AVX1-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
397; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
398; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
399; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
400; AVX1-NEXT:    vzeroupper
401; AVX1-NEXT:    retq
402;
403; AVX2-LABEL: test_cmp_v8f64:
404; AVX2:       # %bb.0:
405; AVX2-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
406; AVX2-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
407; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
408; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
409; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
410; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
411; AVX2-NEXT:    vzeroupper
412; AVX2-NEXT:    retq
413;
414; AVX512F-LABEL: test_cmp_v8f64:
415; AVX512F:       # %bb.0:
416; AVX512F-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
417; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
418; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
419; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
420; AVX512F-NEXT:    vzeroupper
421; AVX512F-NEXT:    retq
422;
423; AVX512DQ-LABEL: test_cmp_v8f64:
424; AVX512DQ:       # %bb.0:
425; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
426; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
427; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
428; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
429; AVX512DQ-NEXT:    vzeroupper
430; AVX512DQ-NEXT:    retq
431;
432; AVX512BW-LABEL: test_cmp_v8f64:
433; AVX512BW:       # %bb.0:
434; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
435; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
436; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
437; AVX512BW-NEXT:    vzeroupper
438; AVX512BW-NEXT:    retq
439  %1 = fcmp ogt <8 x double> %a0, %a1
440  ret <8 x i1> %1
441}
442
443define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
444; SSE-LABEL: test_cmp_v16f32:
445; SSE:       # %bb.0:
446; SSE-NEXT:    cmpltps %xmm3, %xmm7
447; SSE-NEXT:    cmpltps %xmm2, %xmm6
448; SSE-NEXT:    packssdw %xmm7, %xmm6
449; SSE-NEXT:    cmpltps %xmm1, %xmm5
450; SSE-NEXT:    cmpltps %xmm0, %xmm4
451; SSE-NEXT:    packssdw %xmm5, %xmm4
452; SSE-NEXT:    packsswb %xmm6, %xmm4
453; SSE-NEXT:    movdqa %xmm4, %xmm0
454; SSE-NEXT:    retq
455;
456; AVX1-LABEL: test_cmp_v16f32:
457; AVX1:       # %bb.0:
458; AVX1-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
459; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
460; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
461; AVX1-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
462; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
463; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
464; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
465; AVX1-NEXT:    vzeroupper
466; AVX1-NEXT:    retq
467;
468; AVX2-LABEL: test_cmp_v16f32:
469; AVX2:       # %bb.0:
470; AVX2-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
471; AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
472; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
473; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
474; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
475; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
476; AVX2-NEXT:    vzeroupper
477; AVX2-NEXT:    retq
478;
479; AVX512F-LABEL: test_cmp_v16f32:
480; AVX512F:       # %bb.0:
481; AVX512F-NEXT:    vcmpltps %zmm0, %zmm1, %k1
482; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
483; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
484; AVX512F-NEXT:    vzeroupper
485; AVX512F-NEXT:    retq
486;
487; AVX512DQ-LABEL: test_cmp_v16f32:
488; AVX512DQ:       # %bb.0:
489; AVX512DQ-NEXT:    vcmpltps %zmm0, %zmm1, %k0
490; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
491; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
492; AVX512DQ-NEXT:    vzeroupper
493; AVX512DQ-NEXT:    retq
494;
495; AVX512BW-LABEL: test_cmp_v16f32:
496; AVX512BW:       # %bb.0:
497; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm1, %k0
498; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
499; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
500; AVX512BW-NEXT:    vzeroupper
501; AVX512BW-NEXT:    retq
502  %1 = fcmp ogt <16 x float> %a0, %a1
503  ret <16 x i1> %1
504}
505
506define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
507; SSE2-LABEL: test_cmp_v8i64:
508; SSE2:       # %bb.0:
509; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
510; SSE2-NEXT:    pxor %xmm8, %xmm7
511; SSE2-NEXT:    pxor %xmm8, %xmm3
512; SSE2-NEXT:    movdqa %xmm3, %xmm9
513; SSE2-NEXT:    pcmpgtd %xmm7, %xmm9
514; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
515; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
516; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
517; SSE2-NEXT:    pand %xmm10, %xmm3
518; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
519; SSE2-NEXT:    por %xmm3, %xmm7
520; SSE2-NEXT:    pxor %xmm8, %xmm6
521; SSE2-NEXT:    pxor %xmm8, %xmm2
522; SSE2-NEXT:    movdqa %xmm2, %xmm3
523; SSE2-NEXT:    pcmpgtd %xmm6, %xmm3
524; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm3[0,0,2,2]
525; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
526; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
527; SSE2-NEXT:    pand %xmm9, %xmm6
528; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
529; SSE2-NEXT:    por %xmm6, %xmm2
530; SSE2-NEXT:    packssdw %xmm7, %xmm2
531; SSE2-NEXT:    pxor %xmm8, %xmm5
532; SSE2-NEXT:    pxor %xmm8, %xmm1
533; SSE2-NEXT:    movdqa %xmm1, %xmm3
534; SSE2-NEXT:    pcmpgtd %xmm5, %xmm3
535; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
536; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
537; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
538; SSE2-NEXT:    pand %xmm6, %xmm1
539; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
540; SSE2-NEXT:    por %xmm1, %xmm3
541; SSE2-NEXT:    pxor %xmm8, %xmm4
542; SSE2-NEXT:    pxor %xmm8, %xmm0
543; SSE2-NEXT:    movdqa %xmm0, %xmm1
544; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
545; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
546; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
547; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
548; SSE2-NEXT:    pand %xmm5, %xmm4
549; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
550; SSE2-NEXT:    por %xmm4, %xmm0
551; SSE2-NEXT:    packssdw %xmm3, %xmm0
552; SSE2-NEXT:    packssdw %xmm2, %xmm0
553; SSE2-NEXT:    retq
554;
555; SSE42-LABEL: test_cmp_v8i64:
556; SSE42:       # %bb.0:
557; SSE42-NEXT:    pcmpgtq %xmm7, %xmm3
558; SSE42-NEXT:    pcmpgtq %xmm6, %xmm2
559; SSE42-NEXT:    packssdw %xmm3, %xmm2
560; SSE42-NEXT:    pcmpgtq %xmm5, %xmm1
561; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
562; SSE42-NEXT:    packssdw %xmm1, %xmm0
563; SSE42-NEXT:    packssdw %xmm2, %xmm0
564; SSE42-NEXT:    retq
565;
566; AVX1-LABEL: test_cmp_v8i64:
567; AVX1:       # %bb.0:
568; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
569; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
570; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
571; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
572; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
573; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
574; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
575; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
576; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
577; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
578; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
579; AVX1-NEXT:    vzeroupper
580; AVX1-NEXT:    retq
581;
582; AVX2-LABEL: test_cmp_v8i64:
583; AVX2:       # %bb.0:
584; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm1
585; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
586; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
587; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
588; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
589; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
590; AVX2-NEXT:    vzeroupper
591; AVX2-NEXT:    retq
592;
593; AVX512F-LABEL: test_cmp_v8i64:
594; AVX512F:       # %bb.0:
595; AVX512F-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1
596; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
597; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
598; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
599; AVX512F-NEXT:    vzeroupper
600; AVX512F-NEXT:    retq
601;
602; AVX512DQ-LABEL: test_cmp_v8i64:
603; AVX512DQ:       # %bb.0:
604; AVX512DQ-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
605; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
606; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
607; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
608; AVX512DQ-NEXT:    vzeroupper
609; AVX512DQ-NEXT:    retq
610;
611; AVX512BW-LABEL: test_cmp_v8i64:
612; AVX512BW:       # %bb.0:
613; AVX512BW-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
614; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
615; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
616; AVX512BW-NEXT:    vzeroupper
617; AVX512BW-NEXT:    retq
618  %1 = icmp sgt <8 x i64> %a0, %a1
619  ret <8 x i1> %1
620}
621
622define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind {
623; SSE-LABEL: test_cmp_v16i32:
624; SSE:       # %bb.0:
625; SSE-NEXT:    pcmpgtd %xmm7, %xmm3
626; SSE-NEXT:    pcmpgtd %xmm6, %xmm2
627; SSE-NEXT:    packssdw %xmm3, %xmm2
628; SSE-NEXT:    pcmpgtd %xmm5, %xmm1
629; SSE-NEXT:    pcmpgtd %xmm4, %xmm0
630; SSE-NEXT:    packssdw %xmm1, %xmm0
631; SSE-NEXT:    packsswb %xmm2, %xmm0
632; SSE-NEXT:    retq
633;
634; AVX1-LABEL: test_cmp_v16i32:
635; AVX1:       # %bb.0:
636; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
637; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
638; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
639; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm1, %xmm1
640; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
641; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
642; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
643; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm4, %xmm3
644; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
645; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
646; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
647; AVX1-NEXT:    vzeroupper
648; AVX1-NEXT:    retq
649;
650; AVX2-LABEL: test_cmp_v16i32:
651; AVX2:       # %bb.0:
652; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm1, %ymm1
653; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
654; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
655; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
656; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
657; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
658; AVX2-NEXT:    vzeroupper
659; AVX2-NEXT:    retq
660;
661; AVX512F-LABEL: test_cmp_v16i32:
662; AVX512F:       # %bb.0:
663; AVX512F-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
664; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
665; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
666; AVX512F-NEXT:    vzeroupper
667; AVX512F-NEXT:    retq
668;
669; AVX512DQ-LABEL: test_cmp_v16i32:
670; AVX512DQ:       # %bb.0:
671; AVX512DQ-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
672; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
673; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
674; AVX512DQ-NEXT:    vzeroupper
675; AVX512DQ-NEXT:    retq
676;
677; AVX512BW-LABEL: test_cmp_v16i32:
678; AVX512BW:       # %bb.0:
679; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
680; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
681; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
682; AVX512BW-NEXT:    vzeroupper
683; AVX512BW-NEXT:    retq
684  %1 = icmp sgt <16 x i32> %a0, %a1
685  ret <16 x i1> %1
686}
687
688define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind {
689; SSE-LABEL: test_cmp_v32i16:
690; SSE:       # %bb.0:
691; SSE-NEXT:    movq %rdi, %rax
692; SSE-NEXT:    pcmpgtw %xmm5, %xmm1
693; SSE-NEXT:    pcmpgtw %xmm4, %xmm0
694; SSE-NEXT:    packsswb %xmm1, %xmm0
695; SSE-NEXT:    pmovmskb %xmm0, %ecx
696; SSE-NEXT:    pcmpgtw %xmm7, %xmm3
697; SSE-NEXT:    pcmpgtw %xmm6, %xmm2
698; SSE-NEXT:    packsswb %xmm3, %xmm2
699; SSE-NEXT:    pmovmskb %xmm2, %edx
700; SSE-NEXT:    shll $16, %edx
701; SSE-NEXT:    orl %ecx, %edx
702; SSE-NEXT:    movl %edx, (%rdi)
703; SSE-NEXT:    retq
704;
705; AVX1-LABEL: test_cmp_v32i16:
706; AVX1:       # %bb.0:
707; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
708; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
709; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm5, %xmm4
710; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm1, %xmm1
711; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
712; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
713; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
714; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm4, %xmm3
715; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm0, %xmm0
716; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
717; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
718; AVX1-NEXT:    retq
719;
720; AVX2-LABEL: test_cmp_v32i16:
721; AVX2:       # %bb.0:
722; AVX2-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm1
723; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
724; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
725; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
726; AVX2-NEXT:    retq
727;
728; AVX512F-LABEL: test_cmp_v32i16:
729; AVX512F:       # %bb.0:
730; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
731; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
732; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
733; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
734; AVX512F-NEXT:    vpacksswb %ymm2, %ymm0, %ymm0
735; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
736; AVX512F-NEXT:    retq
737;
738; AVX512DQ-LABEL: test_cmp_v32i16:
739; AVX512DQ:       # %bb.0:
740; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
741; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
742; AVX512DQ-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
743; AVX512DQ-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
744; AVX512DQ-NEXT:    vpacksswb %ymm2, %ymm0, %ymm0
745; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
746; AVX512DQ-NEXT:    retq
747;
748; AVX512BW-LABEL: test_cmp_v32i16:
749; AVX512BW:       # %bb.0:
750; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
751; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
752; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
753; AVX512BW-NEXT:    retq
754  %1 = icmp sgt <32 x i16> %a0, %a1
755  ret <32 x i1> %1
756}
757
758define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind {
759; SSE-LABEL: test_cmp_v64i8:
760; SSE:       # %bb.0:
761; SSE-NEXT:    movq %rdi, %rax
762; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
763; SSE-NEXT:    pmovmskb %xmm0, %ecx
764; SSE-NEXT:    pcmpgtb %xmm5, %xmm1
765; SSE-NEXT:    pmovmskb %xmm1, %edx
766; SSE-NEXT:    shll $16, %edx
767; SSE-NEXT:    orl %ecx, %edx
768; SSE-NEXT:    pcmpgtb %xmm6, %xmm2
769; SSE-NEXT:    pmovmskb %xmm2, %ecx
770; SSE-NEXT:    pcmpgtb %xmm7, %xmm3
771; SSE-NEXT:    pmovmskb %xmm3, %esi
772; SSE-NEXT:    shll $16, %esi
773; SSE-NEXT:    orl %ecx, %esi
774; SSE-NEXT:    shlq $32, %rsi
775; SSE-NEXT:    orq %rdx, %rsi
776; SSE-NEXT:    movq %rsi, (%rdi)
777; SSE-NEXT:    retq
778;
779; AVX1-LABEL: test_cmp_v64i8:
780; AVX1:       # %bb.0:
781; AVX1-NEXT:    movq %rdi, %rax
782; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm4
783; AVX1-NEXT:    vpmovmskb %xmm4, %ecx
784; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
785; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
786; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
787; AVX1-NEXT:    vpmovmskb %xmm0, %edx
788; AVX1-NEXT:    shll $16, %edx
789; AVX1-NEXT:    orl %ecx, %edx
790; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm1, %xmm0
791; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
792; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
793; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
794; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
795; AVX1-NEXT:    vpmovmskb %xmm0, %esi
796; AVX1-NEXT:    shll $16, %esi
797; AVX1-NEXT:    orl %ecx, %esi
798; AVX1-NEXT:    shlq $32, %rsi
799; AVX1-NEXT:    orq %rdx, %rsi
800; AVX1-NEXT:    movq %rsi, (%rdi)
801; AVX1-NEXT:    vzeroupper
802; AVX1-NEXT:    retq
803;
804; AVX2-LABEL: test_cmp_v64i8:
805; AVX2:       # %bb.0:
806; AVX2-NEXT:    movq %rdi, %rax
807; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
808; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
809; AVX2-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm0
810; AVX2-NEXT:    vpmovmskb %ymm0, %edx
811; AVX2-NEXT:    shlq $32, %rdx
812; AVX2-NEXT:    orq %rcx, %rdx
813; AVX2-NEXT:    movq %rdx, (%rdi)
814; AVX2-NEXT:    vzeroupper
815; AVX2-NEXT:    retq
816;
817; AVX512F-LABEL: test_cmp_v64i8:
818; AVX512F:       # %bb.0:
819; AVX512F-NEXT:    movq %rdi, %rax
820; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm2
821; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm3
822; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0
823; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
824; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm2
825; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k1
826; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
827; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
828; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
829; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm1
830; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k2
831; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
832; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
833; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k3
834; AVX512F-NEXT:    kmovw %k3, 6(%rdi)
835; AVX512F-NEXT:    kmovw %k2, 4(%rdi)
836; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
837; AVX512F-NEXT:    kmovw %k0, (%rdi)
838; AVX512F-NEXT:    vzeroupper
839; AVX512F-NEXT:    retq
840;
841; AVX512DQ-LABEL: test_cmp_v64i8:
842; AVX512DQ:       # %bb.0:
843; AVX512DQ-NEXT:    movq %rdi, %rax
844; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm2
845; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm3
846; AVX512DQ-NEXT:    vpmovd2m %zmm3, %k0
847; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm2
848; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm2
849; AVX512DQ-NEXT:    vpmovd2m %zmm2, %k1
850; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
851; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
852; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
853; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
854; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k2
855; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
856; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
857; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
858; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
859; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
860; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
861; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
862; AVX512DQ-NEXT:    vzeroupper
863; AVX512DQ-NEXT:    retq
864;
865; AVX512BW-LABEL: test_cmp_v64i8:
866; AVX512BW:       # %bb.0:
867; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
868; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
869; AVX512BW-NEXT:    retq
870  %1 = icmp sgt <64 x i8> %a0, %a1
871  ret <64 x i1> %1
872}
873
874;
875; 1024-bit vector comparisons
876;
877
878define <16 x i1> @test_cmp_v16f64(<16 x double> %a0, <16 x double> %a1) nounwind {
879; SSE-LABEL: test_cmp_v16f64:
880; SSE:       # %bb.0:
881; SSE-NEXT:    movapd %xmm0, %xmm8
882; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm0
883; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm11
884; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm10
885; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm12
886; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm9
887; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm13
888; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm14
889; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm15
890; SSE-NEXT:    cmpltpd %xmm7, %xmm15
891; SSE-NEXT:    cmpltpd %xmm6, %xmm14
892; SSE-NEXT:    packssdw %xmm15, %xmm14
893; SSE-NEXT:    cmpltpd %xmm5, %xmm13
894; SSE-NEXT:    cmpltpd %xmm4, %xmm9
895; SSE-NEXT:    packssdw %xmm13, %xmm9
896; SSE-NEXT:    packssdw %xmm14, %xmm9
897; SSE-NEXT:    cmpltpd %xmm3, %xmm12
898; SSE-NEXT:    cmpltpd %xmm2, %xmm10
899; SSE-NEXT:    packssdw %xmm12, %xmm10
900; SSE-NEXT:    cmpltpd %xmm1, %xmm11
901; SSE-NEXT:    cmpltpd %xmm8, %xmm0
902; SSE-NEXT:    packssdw %xmm11, %xmm0
903; SSE-NEXT:    packssdw %xmm10, %xmm0
904; SSE-NEXT:    packsswb %xmm9, %xmm0
905; SSE-NEXT:    retq
906;
907; AVX1-LABEL: test_cmp_v16f64:
908; AVX1:       # %bb.0:
909; AVX1-NEXT:    vcmpltpd %ymm3, %ymm7, %ymm3
910; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm7
911; AVX1-NEXT:    vpackssdw %xmm7, %xmm3, %xmm3
912; AVX1-NEXT:    vcmpltpd %ymm2, %ymm6, %ymm2
913; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
914; AVX1-NEXT:    vpackssdw %xmm6, %xmm2, %xmm2
915; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
916; AVX1-NEXT:    vcmpltpd %ymm1, %ymm5, %ymm1
917; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
918; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
919; AVX1-NEXT:    vcmpltpd %ymm0, %ymm4, %ymm0
920; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
921; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
922; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
923; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
924; AVX1-NEXT:    vzeroupper
925; AVX1-NEXT:    retq
926;
927; AVX2-LABEL: test_cmp_v16f64:
928; AVX2:       # %bb.0:
929; AVX2-NEXT:    vcmpltpd %ymm3, %ymm7, %ymm3
930; AVX2-NEXT:    vcmpltpd %ymm2, %ymm6, %ymm2
931; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
932; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
933; AVX2-NEXT:    vcmpltpd %ymm1, %ymm5, %ymm1
934; AVX2-NEXT:    vcmpltpd %ymm0, %ymm4, %ymm0
935; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
936; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
937; AVX2-NEXT:    vpackssdw %ymm2, %ymm0, %ymm0
938; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
939; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
940; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
941; AVX2-NEXT:    vzeroupper
942; AVX2-NEXT:    retq
943;
944; AVX512F-LABEL: test_cmp_v16f64:
945; AVX512F:       # %bb.0:
946; AVX512F-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
947; AVX512F-NEXT:    vcmpltpd %zmm1, %zmm3, %k1
948; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
949; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
950; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
951; AVX512F-NEXT:    vzeroupper
952; AVX512F-NEXT:    retq
953;
954; AVX512DQ-LABEL: test_cmp_v16f64:
955; AVX512DQ:       # %bb.0:
956; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
957; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm3, %k1
958; AVX512DQ-NEXT:    kunpckbw %k0, %k1, %k0
959; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
960; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
961; AVX512DQ-NEXT:    vzeroupper
962; AVX512DQ-NEXT:    retq
963;
964; AVX512BW-LABEL: test_cmp_v16f64:
965; AVX512BW:       # %bb.0:
966; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
967; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm3, %k1
968; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0
969; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
970; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
971; AVX512BW-NEXT:    vzeroupper
972; AVX512BW-NEXT:    retq
973  %1 = fcmp ogt <16 x double> %a0, %a1
974  ret <16 x i1> %1
975}
976
977define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind {
978; SSE-LABEL: test_cmp_v32f32:
979; SSE:       # %bb.0:
980; SSE-NEXT:    movq %rdi, %rax
981; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm8
982; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm10
983; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm9
984; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm11
985; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm12
986; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm13
987; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm14
988; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm15
989; SSE-NEXT:    cmpltps %xmm3, %xmm15
990; SSE-NEXT:    cmpltps %xmm2, %xmm14
991; SSE-NEXT:    packssdw %xmm15, %xmm14
992; SSE-NEXT:    cmpltps %xmm1, %xmm13
993; SSE-NEXT:    cmpltps %xmm0, %xmm12
994; SSE-NEXT:    packssdw %xmm13, %xmm12
995; SSE-NEXT:    packsswb %xmm14, %xmm12
996; SSE-NEXT:    pmovmskb %xmm12, %ecx
997; SSE-NEXT:    cmpltps %xmm7, %xmm11
998; SSE-NEXT:    cmpltps %xmm6, %xmm9
999; SSE-NEXT:    packssdw %xmm11, %xmm9
1000; SSE-NEXT:    cmpltps %xmm5, %xmm10
1001; SSE-NEXT:    cmpltps %xmm4, %xmm8
1002; SSE-NEXT:    packssdw %xmm10, %xmm8
1003; SSE-NEXT:    packsswb %xmm9, %xmm8
1004; SSE-NEXT:    pmovmskb %xmm8, %edx
1005; SSE-NEXT:    shll $16, %edx
1006; SSE-NEXT:    orl %ecx, %edx
1007; SSE-NEXT:    movl %edx, (%rdi)
1008; SSE-NEXT:    retq
1009;
1010; AVX1-LABEL: test_cmp_v32f32:
1011; AVX1:       # %bb.0:
1012; AVX1-NEXT:    vcmpltps %ymm3, %ymm7, %ymm3
1013; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm7
1014; AVX1-NEXT:    vpackssdw %xmm7, %xmm3, %xmm3
1015; AVX1-NEXT:    vcmpltps %ymm2, %ymm6, %ymm2
1016; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
1017; AVX1-NEXT:    vpackssdw %xmm6, %xmm2, %xmm2
1018; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
1019; AVX1-NEXT:    vcmpltps %ymm1, %ymm5, %ymm1
1020; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1021; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
1022; AVX1-NEXT:    vcmpltps %ymm0, %ymm4, %ymm0
1023; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1024; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
1025; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1026; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1027; AVX1-NEXT:    retq
1028;
1029; AVX2-LABEL: test_cmp_v32f32:
1030; AVX2:       # %bb.0:
1031; AVX2-NEXT:    vcmpltps %ymm3, %ymm7, %ymm3
1032; AVX2-NEXT:    vcmpltps %ymm2, %ymm6, %ymm2
1033; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
1034; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
1035; AVX2-NEXT:    vcmpltps %ymm1, %ymm5, %ymm1
1036; AVX2-NEXT:    vcmpltps %ymm0, %ymm4, %ymm0
1037; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1038; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1039; AVX2-NEXT:    vpacksswb %ymm2, %ymm0, %ymm0
1040; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1041; AVX2-NEXT:    retq
1042;
1043; AVX512F-LABEL: test_cmp_v32f32:
1044; AVX512F:       # %bb.0:
1045; AVX512F-NEXT:    vcmpltps %zmm1, %zmm3, %k1
1046; AVX512F-NEXT:    vcmpltps %zmm0, %zmm2, %k2
1047; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
1048; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1049; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1050; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1051; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1052; AVX512F-NEXT:    retq
1053;
1054; AVX512DQ-LABEL: test_cmp_v32f32:
1055; AVX512DQ:       # %bb.0:
1056; AVX512DQ-NEXT:    vcmpltps %zmm1, %zmm3, %k0
1057; AVX512DQ-NEXT:    vcmpltps %zmm0, %zmm2, %k1
1058; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm0
1059; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1060; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm1
1061; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
1062; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1063; AVX512DQ-NEXT:    retq
1064;
1065; AVX512BW-LABEL: test_cmp_v32f32:
1066; AVX512BW:       # %bb.0:
1067; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm2, %k0
1068; AVX512BW-NEXT:    vcmpltps %zmm1, %zmm3, %k1
1069; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
1070; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1071; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1072; AVX512BW-NEXT:    retq
1073  %1 = fcmp ogt <32 x float> %a0, %a1
1074  ret <32 x i1> %1
1075}
1076
1077define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind {
1078; SSE2-LABEL: test_cmp_v16i64:
1079; SSE2:       # %bb.0:
1080; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
1081; SSE2-NEXT:    pxor %xmm8, %xmm7
1082; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
1083; SSE2-NEXT:    pxor %xmm8, %xmm9
1084; SSE2-NEXT:    movdqa %xmm7, %xmm10
1085; SSE2-NEXT:    pcmpgtd %xmm9, %xmm10
1086; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1087; SSE2-NEXT:    pcmpeqd %xmm7, %xmm9
1088; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
1089; SSE2-NEXT:    pand %xmm11, %xmm7
1090; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3]
1091; SSE2-NEXT:    por %xmm7, %xmm9
1092; SSE2-NEXT:    pxor %xmm8, %xmm6
1093; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
1094; SSE2-NEXT:    pxor %xmm8, %xmm7
1095; SSE2-NEXT:    movdqa %xmm6, %xmm10
1096; SSE2-NEXT:    pcmpgtd %xmm7, %xmm10
1097; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1098; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
1099; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1100; SSE2-NEXT:    pand %xmm11, %xmm7
1101; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm10[1,1,3,3]
1102; SSE2-NEXT:    por %xmm7, %xmm6
1103; SSE2-NEXT:    packssdw %xmm9, %xmm6
1104; SSE2-NEXT:    pxor %xmm8, %xmm5
1105; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
1106; SSE2-NEXT:    pxor %xmm8, %xmm7
1107; SSE2-NEXT:    movdqa %xmm5, %xmm9
1108; SSE2-NEXT:    pcmpgtd %xmm7, %xmm9
1109; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
1110; SSE2-NEXT:    pcmpeqd %xmm5, %xmm7
1111; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1112; SSE2-NEXT:    pand %xmm10, %xmm5
1113; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
1114; SSE2-NEXT:    por %xmm5, %xmm7
1115; SSE2-NEXT:    pxor %xmm8, %xmm4
1116; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
1117; SSE2-NEXT:    pxor %xmm8, %xmm5
1118; SSE2-NEXT:    movdqa %xmm4, %xmm9
1119; SSE2-NEXT:    pcmpgtd %xmm5, %xmm9
1120; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
1121; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
1122; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1123; SSE2-NEXT:    pand %xmm10, %xmm5
1124; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3]
1125; SSE2-NEXT:    por %xmm5, %xmm4
1126; SSE2-NEXT:    packssdw %xmm7, %xmm4
1127; SSE2-NEXT:    packssdw %xmm6, %xmm4
1128; SSE2-NEXT:    pxor %xmm8, %xmm3
1129; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
1130; SSE2-NEXT:    pxor %xmm8, %xmm5
1131; SSE2-NEXT:    movdqa %xmm3, %xmm6
1132; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1133; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1134; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
1135; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
1136; SSE2-NEXT:    pand %xmm7, %xmm3
1137; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1138; SSE2-NEXT:    por %xmm3, %xmm5
1139; SSE2-NEXT:    pxor %xmm8, %xmm2
1140; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
1141; SSE2-NEXT:    pxor %xmm8, %xmm3
1142; SSE2-NEXT:    movdqa %xmm2, %xmm6
1143; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1144; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1145; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
1146; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1147; SSE2-NEXT:    pand %xmm7, %xmm3
1148; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1149; SSE2-NEXT:    por %xmm3, %xmm2
1150; SSE2-NEXT:    packssdw %xmm5, %xmm2
1151; SSE2-NEXT:    pxor %xmm8, %xmm1
1152; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
1153; SSE2-NEXT:    pxor %xmm8, %xmm3
1154; SSE2-NEXT:    movdqa %xmm1, %xmm5
1155; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
1156; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1157; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1158; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1159; SSE2-NEXT:    pand %xmm6, %xmm1
1160; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
1161; SSE2-NEXT:    por %xmm1, %xmm3
1162; SSE2-NEXT:    pxor %xmm8, %xmm0
1163; SSE2-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm8
1164; SSE2-NEXT:    movdqa %xmm0, %xmm1
1165; SSE2-NEXT:    pcmpgtd %xmm8, %xmm1
1166; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1167; SSE2-NEXT:    pcmpeqd %xmm0, %xmm8
1168; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm8[1,1,3,3]
1169; SSE2-NEXT:    pand %xmm5, %xmm6
1170; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1171; SSE2-NEXT:    por %xmm6, %xmm0
1172; SSE2-NEXT:    packssdw %xmm3, %xmm0
1173; SSE2-NEXT:    packssdw %xmm2, %xmm0
1174; SSE2-NEXT:    packsswb %xmm4, %xmm0
1175; SSE2-NEXT:    retq
1176;
1177; SSE42-LABEL: test_cmp_v16i64:
1178; SSE42:       # %bb.0:
1179; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm7
1180; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm6
1181; SSE42-NEXT:    packssdw %xmm7, %xmm6
1182; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm5
1183; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm4
1184; SSE42-NEXT:    packssdw %xmm5, %xmm4
1185; SSE42-NEXT:    packssdw %xmm6, %xmm4
1186; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm3
1187; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm2
1188; SSE42-NEXT:    packssdw %xmm3, %xmm2
1189; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm1
1190; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm0
1191; SSE42-NEXT:    packssdw %xmm1, %xmm0
1192; SSE42-NEXT:    packssdw %xmm2, %xmm0
1193; SSE42-NEXT:    packsswb %xmm4, %xmm0
1194; SSE42-NEXT:    retq
1195;
1196; AVX1-LABEL: test_cmp_v16i64:
1197; AVX1:       # %bb.0:
1198; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
1199; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm9
1200; AVX1-NEXT:    vpcmpgtq %xmm8, %xmm9, %xmm8
1201; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm3, %xmm3
1202; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm3
1203; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
1204; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm8
1205; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm8, %xmm7
1206; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm2, %xmm2
1207; AVX1-NEXT:    vpackssdw %xmm7, %xmm2, %xmm2
1208; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
1209; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm3
1210; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
1211; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm6, %xmm3
1212; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm1
1213; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
1214; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm3
1215; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
1216; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm5, %xmm3
1217; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm0
1218; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
1219; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1220; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1221; AVX1-NEXT:    vzeroupper
1222; AVX1-NEXT:    retq
1223;
1224; AVX2-LABEL: test_cmp_v16i64:
1225; AVX2:       # %bb.0:
1226; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm3, %ymm3
1227; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm2, %ymm2
1228; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
1229; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
1230; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm1, %ymm1
1231; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm0, %ymm0
1232; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1233; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1234; AVX2-NEXT:    vpackssdw %ymm2, %ymm0, %ymm0
1235; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1236; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1237; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1238; AVX2-NEXT:    vzeroupper
1239; AVX2-NEXT:    retq
1240;
1241; AVX512F-LABEL: test_cmp_v16i64:
1242; AVX512F:       # %bb.0:
1243; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm0, %k0
1244; AVX512F-NEXT:    vpcmpgtq %zmm3, %zmm1, %k1
1245; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
1246; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1247; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1248; AVX512F-NEXT:    vzeroupper
1249; AVX512F-NEXT:    retq
1250;
1251; AVX512DQ-LABEL: test_cmp_v16i64:
1252; AVX512DQ:       # %bb.0:
1253; AVX512DQ-NEXT:    vpcmpgtq %zmm2, %zmm0, %k0
1254; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm1, %k1
1255; AVX512DQ-NEXT:    kunpckbw %k0, %k1, %k0
1256; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1257; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1258; AVX512DQ-NEXT:    vzeroupper
1259; AVX512DQ-NEXT:    retq
1260;
1261; AVX512BW-LABEL: test_cmp_v16i64:
1262; AVX512BW:       # %bb.0:
1263; AVX512BW-NEXT:    vpcmpgtq %zmm2, %zmm0, %k0
1264; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm1, %k1
1265; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0
1266; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1267; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1268; AVX512BW-NEXT:    vzeroupper
1269; AVX512BW-NEXT:    retq
1270  %1 = icmp sgt <16 x i64> %a0, %a1
1271  ret <16 x i1> %1
1272}
1273
1274define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind {
1275; SSE-LABEL: test_cmp_v32i32:
1276; SSE:       # %bb.0:
1277; SSE-NEXT:    movq %rdi, %rax
1278; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm3
1279; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm2
1280; SSE-NEXT:    packssdw %xmm3, %xmm2
1281; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm1
1282; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm0
1283; SSE-NEXT:    packssdw %xmm1, %xmm0
1284; SSE-NEXT:    packsswb %xmm2, %xmm0
1285; SSE-NEXT:    pmovmskb %xmm0, %ecx
1286; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm7
1287; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm6
1288; SSE-NEXT:    packssdw %xmm7, %xmm6
1289; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm5
1290; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm4
1291; SSE-NEXT:    packssdw %xmm5, %xmm4
1292; SSE-NEXT:    packsswb %xmm6, %xmm4
1293; SSE-NEXT:    pmovmskb %xmm4, %edx
1294; SSE-NEXT:    shll $16, %edx
1295; SSE-NEXT:    orl %ecx, %edx
1296; SSE-NEXT:    movl %edx, (%rdi)
1297; SSE-NEXT:    retq
1298;
1299; AVX1-LABEL: test_cmp_v32i32:
1300; AVX1:       # %bb.0:
1301; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
1302; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm9
1303; AVX1-NEXT:    vpcmpgtd %xmm8, %xmm9, %xmm8
1304; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm3, %xmm3
1305; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm3
1306; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
1307; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm8
1308; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm8, %xmm7
1309; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm2, %xmm2
1310; AVX1-NEXT:    vpackssdw %xmm7, %xmm2, %xmm2
1311; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
1312; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm3
1313; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
1314; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
1315; AVX1-NEXT:    vpcmpgtd %xmm5, %xmm1, %xmm1
1316; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
1317; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm3
1318; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
1319; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm3
1320; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm0, %xmm0
1321; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
1322; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1323; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1324; AVX1-NEXT:    retq
1325;
1326; AVX2-LABEL: test_cmp_v32i32:
1327; AVX2:       # %bb.0:
1328; AVX2-NEXT:    vpcmpgtd %ymm7, %ymm3, %ymm3
1329; AVX2-NEXT:    vpcmpgtd %ymm6, %ymm2, %ymm2
1330; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
1331; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
1332; AVX2-NEXT:    vpcmpgtd %ymm5, %ymm1, %ymm1
1333; AVX2-NEXT:    vpcmpgtd %ymm4, %ymm0, %ymm0
1334; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1335; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1336; AVX2-NEXT:    vpacksswb %ymm2, %ymm0, %ymm0
1337; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1338; AVX2-NEXT:    retq
1339;
1340; AVX512F-LABEL: test_cmp_v32i32:
1341; AVX512F:       # %bb.0:
1342; AVX512F-NEXT:    vpcmpgtd %zmm3, %zmm1, %k1
1343; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm0, %k2
1344; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
1345; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1346; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1347; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1348; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1349; AVX512F-NEXT:    retq
1350;
1351; AVX512DQ-LABEL: test_cmp_v32i32:
1352; AVX512DQ:       # %bb.0:
1353; AVX512DQ-NEXT:    vpcmpgtd %zmm3, %zmm1, %k0
1354; AVX512DQ-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1
1355; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm0
1356; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1357; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm1
1358; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
1359; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1360; AVX512DQ-NEXT:    retq
1361;
1362; AVX512BW-LABEL: test_cmp_v32i32:
1363; AVX512BW:       # %bb.0:
1364; AVX512BW-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
1365; AVX512BW-NEXT:    vpcmpgtd %zmm3, %zmm1, %k1
1366; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
1367; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1368; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1369; AVX512BW-NEXT:    retq
1370  %1 = icmp sgt <32 x i32> %a0, %a1
1371  ret <32 x i1> %1
1372}
1373
1374define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
1375; SSE-LABEL: test_cmp_v64i16:
1376; SSE:       # %bb.0:
1377; SSE-NEXT:    movq %rdi, %rax
1378; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm1
1379; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm0
1380; SSE-NEXT:    packsswb %xmm1, %xmm0
1381; SSE-NEXT:    pmovmskb %xmm0, %ecx
1382; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm3
1383; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm2
1384; SSE-NEXT:    packsswb %xmm3, %xmm2
1385; SSE-NEXT:    pmovmskb %xmm2, %edx
1386; SSE-NEXT:    shll $16, %edx
1387; SSE-NEXT:    orl %ecx, %edx
1388; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm5
1389; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm4
1390; SSE-NEXT:    packsswb %xmm5, %xmm4
1391; SSE-NEXT:    pmovmskb %xmm4, %ecx
1392; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm7
1393; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm6
1394; SSE-NEXT:    packsswb %xmm7, %xmm6
1395; SSE-NEXT:    pmovmskb %xmm6, %esi
1396; SSE-NEXT:    shll $16, %esi
1397; SSE-NEXT:    orl %ecx, %esi
1398; SSE-NEXT:    shlq $32, %rsi
1399; SSE-NEXT:    orq %rdx, %rsi
1400; SSE-NEXT:    movq %rsi, (%rdi)
1401; SSE-NEXT:    retq
1402;
1403; AVX1-LABEL: test_cmp_v64i16:
1404; AVX1:       # %bb.0:
1405; AVX1-NEXT:    movq %rdi, %rax
1406; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm8
1407; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm9
1408; AVX1-NEXT:    vpcmpgtw %xmm8, %xmm9, %xmm8
1409; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm0
1410; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm0
1411; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1412; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm0
1413; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1414; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm4, %xmm0
1415; AVX1-NEXT:    vpcmpgtw %xmm5, %xmm1, %xmm1
1416; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
1417; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1418; AVX1-NEXT:    shll $16, %edx
1419; AVX1-NEXT:    orl %ecx, %edx
1420; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm0
1421; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
1422; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1423; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm2, %xmm1
1424; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
1425; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1426; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm0
1427; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
1428; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1429; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm3, %xmm1
1430; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
1431; AVX1-NEXT:    vpmovmskb %xmm0, %esi
1432; AVX1-NEXT:    shll $16, %esi
1433; AVX1-NEXT:    orl %ecx, %esi
1434; AVX1-NEXT:    shlq $32, %rsi
1435; AVX1-NEXT:    orq %rdx, %rsi
1436; AVX1-NEXT:    movq %rsi, (%rdi)
1437; AVX1-NEXT:    vzeroupper
1438; AVX1-NEXT:    retq
1439;
1440; AVX2-LABEL: test_cmp_v64i16:
1441; AVX2:       # %bb.0:
1442; AVX2-NEXT:    movq %rdi, %rax
1443; AVX2-NEXT:    vpcmpgtw %ymm5, %ymm1, %ymm1
1444; AVX2-NEXT:    vpcmpgtw %ymm4, %ymm0, %ymm0
1445; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
1446; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1447; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
1448; AVX2-NEXT:    vpcmpgtw %ymm7, %ymm3, %ymm0
1449; AVX2-NEXT:    vpcmpgtw %ymm6, %ymm2, %ymm1
1450; AVX2-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0
1451; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1452; AVX2-NEXT:    vpmovmskb %ymm0, %edx
1453; AVX2-NEXT:    shlq $32, %rdx
1454; AVX2-NEXT:    orq %rcx, %rdx
1455; AVX2-NEXT:    movq %rdx, (%rdi)
1456; AVX2-NEXT:    vzeroupper
1457; AVX2-NEXT:    retq
1458;
1459; AVX512F-LABEL: test_cmp_v64i16:
1460; AVX512F:       # %bb.0:
1461; AVX512F-NEXT:    movq %rdi, %rax
1462; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm4
1463; AVX512F-NEXT:    vpmovsxwd %ymm4, %zmm4
1464; AVX512F-NEXT:    vptestmd %zmm4, %zmm4, %k0
1465; AVX512F-NEXT:    vextracti64x4 $1, %zmm2, %ymm2
1466; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1467; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
1468; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1469; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1470; AVX512F-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm0
1471; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1472; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
1473; AVX512F-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
1474; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1475; AVX512F-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
1476; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1477; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k3
1478; AVX512F-NEXT:    kmovw %k3, 6(%rdi)
1479; AVX512F-NEXT:    kmovw %k2, 4(%rdi)
1480; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
1481; AVX512F-NEXT:    kmovw %k0, (%rdi)
1482; AVX512F-NEXT:    vzeroupper
1483; AVX512F-NEXT:    retq
1484;
1485; AVX512DQ-LABEL: test_cmp_v64i16:
1486; AVX512DQ:       # %bb.0:
1487; AVX512DQ-NEXT:    movq %rdi, %rax
1488; AVX512DQ-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm4
1489; AVX512DQ-NEXT:    vpmovsxwd %ymm4, %zmm4
1490; AVX512DQ-NEXT:    vpmovd2m %zmm4, %k0
1491; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm2
1492; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1493; AVX512DQ-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
1494; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
1495; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
1496; AVX512DQ-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm0
1497; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
1498; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k2
1499; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
1500; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1501; AVX512DQ-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
1502; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
1503; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
1504; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
1505; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
1506; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
1507; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
1508; AVX512DQ-NEXT:    vzeroupper
1509; AVX512DQ-NEXT:    retq
1510;
1511; AVX512BW-LABEL: test_cmp_v64i16:
1512; AVX512BW:       # %bb.0:
1513; AVX512BW-NEXT:    vpcmpgtw %zmm2, %zmm0, %k0
1514; AVX512BW-NEXT:    vpcmpgtw %zmm3, %zmm1, %k1
1515; AVX512BW-NEXT:    kunpckdq %k0, %k1, %k0
1516; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1517; AVX512BW-NEXT:    retq
1518  %1 = icmp sgt <64 x i16> %a0, %a1
1519  ret <64 x i1> %1
1520}
1521
1522define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind {
1523; SSE-LABEL: test_cmp_v128i8:
1524; SSE:       # %bb.0:
1525; SSE-NEXT:    movq %rdi, %rax
1526; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm0
1527; SSE-NEXT:    pmovmskb %xmm0, %ecx
1528; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm1
1529; SSE-NEXT:    pmovmskb %xmm1, %edx
1530; SSE-NEXT:    shll $16, %edx
1531; SSE-NEXT:    orl %ecx, %edx
1532; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm2
1533; SSE-NEXT:    pmovmskb %xmm2, %esi
1534; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm3
1535; SSE-NEXT:    pmovmskb %xmm3, %ecx
1536; SSE-NEXT:    shll $16, %ecx
1537; SSE-NEXT:    orl %esi, %ecx
1538; SSE-NEXT:    shlq $32, %rcx
1539; SSE-NEXT:    orq %rdx, %rcx
1540; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm4
1541; SSE-NEXT:    pmovmskb %xmm4, %edx
1542; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm5
1543; SSE-NEXT:    pmovmskb %xmm5, %esi
1544; SSE-NEXT:    shll $16, %esi
1545; SSE-NEXT:    orl %edx, %esi
1546; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm6
1547; SSE-NEXT:    pmovmskb %xmm6, %edx
1548; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm7
1549; SSE-NEXT:    pmovmskb %xmm7, %edi
1550; SSE-NEXT:    shll $16, %edi
1551; SSE-NEXT:    orl %edx, %edi
1552; SSE-NEXT:    shlq $32, %rdi
1553; SSE-NEXT:    orq %rsi, %rdi
1554; SSE-NEXT:    movq %rdi, 8(%rax)
1555; SSE-NEXT:    movq %rcx, (%rax)
1556; SSE-NEXT:    retq
1557;
1558; AVX1-LABEL: test_cmp_v128i8:
1559; AVX1:       # %bb.0:
1560; AVX1-NEXT:    movq %rdi, %rax
1561; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm0, %xmm8
1562; AVX1-NEXT:    vpmovmskb %xmm8, %ecx
1563; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
1564; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1565; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm0, %xmm0
1566; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1567; AVX1-NEXT:    shll $16, %edx
1568; AVX1-NEXT:    orl %ecx, %edx
1569; AVX1-NEXT:    vpcmpgtb %xmm5, %xmm1, %xmm0
1570; AVX1-NEXT:    vpmovmskb %xmm0, %esi
1571; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm0
1572; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
1573; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1574; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
1575; AVX1-NEXT:    shll $16, %ecx
1576; AVX1-NEXT:    orl %esi, %ecx
1577; AVX1-NEXT:    shlq $32, %rcx
1578; AVX1-NEXT:    orq %rdx, %rcx
1579; AVX1-NEXT:    vpcmpgtb %xmm6, %xmm2, %xmm0
1580; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1581; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm0
1582; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
1583; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1584; AVX1-NEXT:    vpmovmskb %xmm0, %esi
1585; AVX1-NEXT:    shll $16, %esi
1586; AVX1-NEXT:    orl %edx, %esi
1587; AVX1-NEXT:    vpcmpgtb %xmm7, %xmm3, %xmm0
1588; AVX1-NEXT:    vpmovmskb %xmm0, %edx
1589; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm0
1590; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
1591; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1592; AVX1-NEXT:    vpmovmskb %xmm0, %edi
1593; AVX1-NEXT:    shll $16, %edi
1594; AVX1-NEXT:    orl %edx, %edi
1595; AVX1-NEXT:    shlq $32, %rdi
1596; AVX1-NEXT:    orq %rsi, %rdi
1597; AVX1-NEXT:    movq %rdi, 8(%rax)
1598; AVX1-NEXT:    movq %rcx, (%rax)
1599; AVX1-NEXT:    vzeroupper
1600; AVX1-NEXT:    retq
1601;
1602; AVX2-LABEL: test_cmp_v128i8:
1603; AVX2:       # %bb.0:
1604; AVX2-NEXT:    movq %rdi, %rax
1605; AVX2-NEXT:    vpcmpgtb %ymm4, %ymm0, %ymm0
1606; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
1607; AVX2-NEXT:    vpcmpgtb %ymm5, %ymm1, %ymm0
1608; AVX2-NEXT:    vpmovmskb %ymm0, %edx
1609; AVX2-NEXT:    shlq $32, %rdx
1610; AVX2-NEXT:    orq %rcx, %rdx
1611; AVX2-NEXT:    vpcmpgtb %ymm6, %ymm2, %ymm0
1612; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
1613; AVX2-NEXT:    vpcmpgtb %ymm7, %ymm3, %ymm0
1614; AVX2-NEXT:    vpmovmskb %ymm0, %esi
1615; AVX2-NEXT:    shlq $32, %rsi
1616; AVX2-NEXT:    orq %rcx, %rsi
1617; AVX2-NEXT:    movq %rsi, 8(%rdi)
1618; AVX2-NEXT:    movq %rdx, (%rdi)
1619; AVX2-NEXT:    vzeroupper
1620; AVX2-NEXT:    retq
1621;
1622; AVX512F-LABEL: test_cmp_v128i8:
1623; AVX512F:       # %bb.0:
1624; AVX512F-NEXT:    movq %rdi, %rax
1625; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm4
1626; AVX512F-NEXT:    vpmovsxbd %xmm4, %zmm5
1627; AVX512F-NEXT:    vptestmd %zmm5, %zmm5, %k0
1628; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm4
1629; AVX512F-NEXT:    vpmovsxbd %xmm4, %zmm4
1630; AVX512F-NEXT:    vptestmd %zmm4, %zmm4, %k1
1631; AVX512F-NEXT:    vextracti64x4 $1, %zmm2, %ymm2
1632; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1633; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1634; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm2
1635; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k2
1636; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1637; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1638; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k3
1639; AVX512F-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm0
1640; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm2
1641; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k4
1642; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1643; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1644; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k5
1645; AVX512F-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
1646; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1647; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
1648; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm1
1649; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k6
1650; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1651; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1652; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k7
1653; AVX512F-NEXT:    kmovw %k7, 14(%rdi)
1654; AVX512F-NEXT:    kmovw %k6, 12(%rdi)
1655; AVX512F-NEXT:    kmovw %k5, 10(%rdi)
1656; AVX512F-NEXT:    kmovw %k4, 8(%rdi)
1657; AVX512F-NEXT:    kmovw %k3, 6(%rdi)
1658; AVX512F-NEXT:    kmovw %k2, 4(%rdi)
1659; AVX512F-NEXT:    kmovw %k1, 2(%rdi)
1660; AVX512F-NEXT:    kmovw %k0, (%rdi)
1661; AVX512F-NEXT:    vzeroupper
1662; AVX512F-NEXT:    retq
1663;
1664; AVX512DQ-LABEL: test_cmp_v128i8:
1665; AVX512DQ:       # %bb.0:
1666; AVX512DQ-NEXT:    movq %rdi, %rax
1667; AVX512DQ-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm4
1668; AVX512DQ-NEXT:    vpmovsxbd %xmm4, %zmm5
1669; AVX512DQ-NEXT:    vpmovd2m %zmm5, %k0
1670; AVX512DQ-NEXT:    vextracti128 $1, %ymm4, %xmm4
1671; AVX512DQ-NEXT:    vpmovsxbd %xmm4, %zmm4
1672; AVX512DQ-NEXT:    vpmovd2m %zmm4, %k1
1673; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm2
1674; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1675; AVX512DQ-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1676; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm2
1677; AVX512DQ-NEXT:    vpmovd2m %zmm2, %k2
1678; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1679; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
1680; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
1681; AVX512DQ-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm0
1682; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm2
1683; AVX512DQ-NEXT:    vpmovd2m %zmm2, %k4
1684; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1685; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
1686; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k5
1687; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
1688; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1689; AVX512DQ-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
1690; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
1691; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k6
1692; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1693; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
1694; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k7
1695; AVX512DQ-NEXT:    kmovw %k7, 14(%rdi)
1696; AVX512DQ-NEXT:    kmovw %k6, 12(%rdi)
1697; AVX512DQ-NEXT:    kmovw %k5, 10(%rdi)
1698; AVX512DQ-NEXT:    kmovw %k4, 8(%rdi)
1699; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
1700; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
1701; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
1702; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
1703; AVX512DQ-NEXT:    vzeroupper
1704; AVX512DQ-NEXT:    retq
1705;
1706; AVX512BW-LABEL: test_cmp_v128i8:
1707; AVX512BW:       # %bb.0:
1708; AVX512BW-NEXT:    movq %rdi, %rax
1709; AVX512BW-NEXT:    vpcmpgtb %zmm2, %zmm0, %k0
1710; AVX512BW-NEXT:    vpcmpgtb %zmm3, %zmm1, %k1
1711; AVX512BW-NEXT:    kmovq %k1, 8(%rdi)
1712; AVX512BW-NEXT:    kmovq %k0, (%rdi)
1713; AVX512BW-NEXT:    vzeroupper
1714; AVX512BW-NEXT:    retq
1715  %1 = icmp sgt <128 x i8> %a0, %a1
1716  ret <128 x i1> %1
1717}
1718
1719;
1720; 2048-bit vector comparisons
1721;
1722
1723define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind {
1724; SSE-LABEL: test_cmp_v32f64:
1725; SSE:       # %bb.0:
1726; SSE-NEXT:    movq %rdi, %rax
1727; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm8
1728; SSE-NEXT:    cmpltpd %xmm7, %xmm8
1729; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm7
1730; SSE-NEXT:    cmpltpd %xmm6, %xmm7
1731; SSE-NEXT:    packssdw %xmm8, %xmm7
1732; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm6
1733; SSE-NEXT:    cmpltpd %xmm5, %xmm6
1734; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm5
1735; SSE-NEXT:    cmpltpd %xmm4, %xmm5
1736; SSE-NEXT:    packssdw %xmm6, %xmm5
1737; SSE-NEXT:    packssdw %xmm7, %xmm5
1738; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm4
1739; SSE-NEXT:    cmpltpd %xmm3, %xmm4
1740; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
1741; SSE-NEXT:    cmpltpd %xmm2, %xmm3
1742; SSE-NEXT:    packssdw %xmm4, %xmm3
1743; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
1744; SSE-NEXT:    cmpltpd %xmm1, %xmm2
1745; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm4
1746; SSE-NEXT:    cmpltpd %xmm0, %xmm4
1747; SSE-NEXT:    packssdw %xmm2, %xmm4
1748; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm0
1749; SSE-NEXT:    packssdw %xmm3, %xmm4
1750; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
1751; SSE-NEXT:    packsswb %xmm5, %xmm4
1752; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
1753; SSE-NEXT:    pmovmskb %xmm4, %ecx
1754; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
1755; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm3
1756; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm2
1757; SSE-NEXT:    packssdw %xmm3, %xmm2
1758; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
1759; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm3
1760; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm1
1761; SSE-NEXT:    packssdw %xmm3, %xmm1
1762; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
1763; SSE-NEXT:    packssdw %xmm2, %xmm1
1764; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
1765; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm2
1766; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm3
1767; SSE-NEXT:    packssdw %xmm2, %xmm3
1768; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
1769; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm2
1770; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm0
1771; SSE-NEXT:    packssdw %xmm2, %xmm0
1772; SSE-NEXT:    packssdw %xmm3, %xmm0
1773; SSE-NEXT:    packsswb %xmm1, %xmm0
1774; SSE-NEXT:    pmovmskb %xmm0, %edx
1775; SSE-NEXT:    shll $16, %edx
1776; SSE-NEXT:    orl %ecx, %edx
1777; SSE-NEXT:    movl %edx, (%rdi)
1778; SSE-NEXT:    retq
1779;
1780; AVX1-LABEL: test_cmp_v32f64:
1781; AVX1:       # %bb.0:
1782; AVX1-NEXT:    pushq %rbp
1783; AVX1-NEXT:    movq %rsp, %rbp
1784; AVX1-NEXT:    andq $-32, %rsp
1785; AVX1-NEXT:    subq $32, %rsp
1786; AVX1-NEXT:    vmovapd 16(%rbp), %ymm8
1787; AVX1-NEXT:    vmovapd 48(%rbp), %ymm9
1788; AVX1-NEXT:    vmovapd 80(%rbp), %ymm10
1789; AVX1-NEXT:    vmovapd 112(%rbp), %ymm11
1790; AVX1-NEXT:    vmovapd 144(%rbp), %ymm12
1791; AVX1-NEXT:    vmovapd 176(%rbp), %ymm13
1792; AVX1-NEXT:    vmovapd 208(%rbp), %ymm14
1793; AVX1-NEXT:    vmovapd 240(%rbp), %ymm15
1794; AVX1-NEXT:    vcmpltpd %ymm7, %ymm15, %ymm7
1795; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm15
1796; AVX1-NEXT:    vpackssdw %xmm15, %xmm7, %xmm7
1797; AVX1-NEXT:    vcmpltpd %ymm6, %ymm14, %ymm6
1798; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm14
1799; AVX1-NEXT:    vpackssdw %xmm14, %xmm6, %xmm6
1800; AVX1-NEXT:    vpackssdw %xmm7, %xmm6, %xmm6
1801; AVX1-NEXT:    vcmpltpd %ymm5, %ymm13, %ymm5
1802; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm7
1803; AVX1-NEXT:    vpackssdw %xmm7, %xmm5, %xmm5
1804; AVX1-NEXT:    vcmpltpd %ymm4, %ymm12, %ymm4
1805; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm7
1806; AVX1-NEXT:    vpackssdw %xmm7, %xmm4, %xmm4
1807; AVX1-NEXT:    vpackssdw %xmm5, %xmm4, %xmm4
1808; AVX1-NEXT:    vpacksswb %xmm6, %xmm4, %xmm4
1809; AVX1-NEXT:    vcmpltpd %ymm3, %ymm11, %ymm3
1810; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
1811; AVX1-NEXT:    vpackssdw %xmm5, %xmm3, %xmm3
1812; AVX1-NEXT:    vcmpltpd %ymm2, %ymm10, %ymm2
1813; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1814; AVX1-NEXT:    vpackssdw %xmm5, %xmm2, %xmm2
1815; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
1816; AVX1-NEXT:    vcmpltpd %ymm1, %ymm9, %ymm1
1817; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1818; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
1819; AVX1-NEXT:    vcmpltpd %ymm0, %ymm8, %ymm0
1820; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1821; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
1822; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1823; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1824; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
1825; AVX1-NEXT:    movq %rbp, %rsp
1826; AVX1-NEXT:    popq %rbp
1827; AVX1-NEXT:    retq
1828;
1829; AVX2-LABEL: test_cmp_v32f64:
1830; AVX2:       # %bb.0:
1831; AVX2-NEXT:    pushq %rbp
1832; AVX2-NEXT:    movq %rsp, %rbp
1833; AVX2-NEXT:    andq $-32, %rsp
1834; AVX2-NEXT:    subq $32, %rsp
1835; AVX2-NEXT:    vmovapd 16(%rbp), %ymm8
1836; AVX2-NEXT:    vmovapd 48(%rbp), %ymm9
1837; AVX2-NEXT:    vmovapd 80(%rbp), %ymm10
1838; AVX2-NEXT:    vmovapd 112(%rbp), %ymm11
1839; AVX2-NEXT:    vmovapd 144(%rbp), %ymm12
1840; AVX2-NEXT:    vmovapd 176(%rbp), %ymm13
1841; AVX2-NEXT:    vmovapd 208(%rbp), %ymm14
1842; AVX2-NEXT:    vmovapd 240(%rbp), %ymm15
1843; AVX2-NEXT:    vcmpltpd %ymm7, %ymm15, %ymm7
1844; AVX2-NEXT:    vcmpltpd %ymm6, %ymm14, %ymm6
1845; AVX2-NEXT:    vpackssdw %ymm7, %ymm6, %ymm6
1846; AVX2-NEXT:    vpermq {{.*#+}} ymm6 = ymm6[0,2,1,3]
1847; AVX2-NEXT:    vcmpltpd %ymm5, %ymm13, %ymm5
1848; AVX2-NEXT:    vcmpltpd %ymm4, %ymm12, %ymm4
1849; AVX2-NEXT:    vpackssdw %ymm5, %ymm4, %ymm4
1850; AVX2-NEXT:    vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3]
1851; AVX2-NEXT:    vpackssdw %ymm6, %ymm4, %ymm4
1852; AVX2-NEXT:    vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3]
1853; AVX2-NEXT:    vcmpltpd %ymm3, %ymm11, %ymm3
1854; AVX2-NEXT:    vcmpltpd %ymm2, %ymm10, %ymm2
1855; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
1856; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
1857; AVX2-NEXT:    vcmpltpd %ymm1, %ymm9, %ymm1
1858; AVX2-NEXT:    vcmpltpd %ymm0, %ymm8, %ymm0
1859; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1860; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1861; AVX2-NEXT:    vpackssdw %ymm2, %ymm0, %ymm0
1862; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1863; AVX2-NEXT:    vpacksswb %ymm4, %ymm0, %ymm0
1864; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1865; AVX2-NEXT:    movq %rbp, %rsp
1866; AVX2-NEXT:    popq %rbp
1867; AVX2-NEXT:    retq
1868;
1869; AVX512F-LABEL: test_cmp_v32f64:
1870; AVX512F:       # %bb.0:
1871; AVX512F-NEXT:    vcmpltpd %zmm2, %zmm6, %k0
1872; AVX512F-NEXT:    vcmpltpd %zmm3, %zmm7, %k1
1873; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
1874; AVX512F-NEXT:    vcmpltpd %zmm0, %zmm4, %k0
1875; AVX512F-NEXT:    vcmpltpd %zmm1, %zmm5, %k2
1876; AVX512F-NEXT:    kunpckbw %k0, %k2, %k2
1877; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
1878; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1879; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1880; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1881; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1882; AVX512F-NEXT:    retq
1883;
1884; AVX512DQ-LABEL: test_cmp_v32f64:
1885; AVX512DQ:       # %bb.0:
1886; AVX512DQ-NEXT:    vcmpltpd %zmm2, %zmm6, %k0
1887; AVX512DQ-NEXT:    vcmpltpd %zmm3, %zmm7, %k1
1888; AVX512DQ-NEXT:    kunpckbw %k0, %k1, %k0
1889; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm4, %k1
1890; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm5, %k2
1891; AVX512DQ-NEXT:    kunpckbw %k1, %k2, %k1
1892; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm0
1893; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1894; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm1
1895; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
1896; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1897; AVX512DQ-NEXT:    retq
1898;
1899; AVX512BW-LABEL: test_cmp_v32f64:
1900; AVX512BW:       # %bb.0:
1901; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm4, %k0
1902; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm5, %k1
1903; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0
1904; AVX512BW-NEXT:    vcmpltpd %zmm2, %zmm6, %k1
1905; AVX512BW-NEXT:    vcmpltpd %zmm3, %zmm7, %k2
1906; AVX512BW-NEXT:    kunpckbw %k1, %k2, %k1
1907; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
1908; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1909; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1910; AVX512BW-NEXT:    retq
1911  %1 = fcmp ogt <32 x double> %a0, %a1
1912  ret <32 x i1> %1
1913}
1914
1915define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind {
1916; SSE2-LABEL: test_cmp_v32i64:
1917; SSE2:       # %bb.0:
1918; SSE2-NEXT:    movq %rdi, %rax
1919; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
1920; SSE2-NEXT:    pxor %xmm8, %xmm7
1921; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
1922; SSE2-NEXT:    pxor %xmm8, %xmm9
1923; SSE2-NEXT:    movdqa %xmm7, %xmm10
1924; SSE2-NEXT:    pcmpgtd %xmm9, %xmm10
1925; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1926; SSE2-NEXT:    pcmpeqd %xmm7, %xmm9
1927; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
1928; SSE2-NEXT:    pand %xmm11, %xmm7
1929; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3]
1930; SSE2-NEXT:    por %xmm7, %xmm9
1931; SSE2-NEXT:    pxor %xmm8, %xmm6
1932; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
1933; SSE2-NEXT:    pxor %xmm8, %xmm7
1934; SSE2-NEXT:    movdqa %xmm6, %xmm10
1935; SSE2-NEXT:    pcmpgtd %xmm7, %xmm10
1936; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1937; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
1938; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1939; SSE2-NEXT:    pand %xmm11, %xmm7
1940; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm10[1,1,3,3]
1941; SSE2-NEXT:    por %xmm7, %xmm6
1942; SSE2-NEXT:    packssdw %xmm9, %xmm6
1943; SSE2-NEXT:    pxor %xmm8, %xmm5
1944; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
1945; SSE2-NEXT:    pxor %xmm8, %xmm7
1946; SSE2-NEXT:    movdqa %xmm5, %xmm9
1947; SSE2-NEXT:    pcmpgtd %xmm7, %xmm9
1948; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
1949; SSE2-NEXT:    pcmpeqd %xmm5, %xmm7
1950; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1951; SSE2-NEXT:    pand %xmm10, %xmm5
1952; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
1953; SSE2-NEXT:    por %xmm5, %xmm7
1954; SSE2-NEXT:    pxor %xmm8, %xmm4
1955; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
1956; SSE2-NEXT:    pxor %xmm8, %xmm5
1957; SSE2-NEXT:    movdqa %xmm4, %xmm9
1958; SSE2-NEXT:    pcmpgtd %xmm5, %xmm9
1959; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
1960; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
1961; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1962; SSE2-NEXT:    pand %xmm10, %xmm5
1963; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3]
1964; SSE2-NEXT:    por %xmm5, %xmm4
1965; SSE2-NEXT:    packssdw %xmm7, %xmm4
1966; SSE2-NEXT:    packssdw %xmm6, %xmm4
1967; SSE2-NEXT:    pxor %xmm8, %xmm3
1968; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
1969; SSE2-NEXT:    pxor %xmm8, %xmm5
1970; SSE2-NEXT:    movdqa %xmm3, %xmm6
1971; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1972; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1973; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
1974; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
1975; SSE2-NEXT:    pand %xmm7, %xmm3
1976; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1977; SSE2-NEXT:    por %xmm3, %xmm5
1978; SSE2-NEXT:    pxor %xmm8, %xmm2
1979; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
1980; SSE2-NEXT:    pxor %xmm8, %xmm3
1981; SSE2-NEXT:    movdqa %xmm2, %xmm6
1982; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
1983; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1984; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
1985; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1986; SSE2-NEXT:    pand %xmm7, %xmm3
1987; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1988; SSE2-NEXT:    por %xmm3, %xmm2
1989; SSE2-NEXT:    packssdw %xmm5, %xmm2
1990; SSE2-NEXT:    pxor %xmm8, %xmm1
1991; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
1992; SSE2-NEXT:    pxor %xmm8, %xmm3
1993; SSE2-NEXT:    movdqa %xmm1, %xmm5
1994; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
1995; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1996; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1997; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1998; SSE2-NEXT:    pand %xmm6, %xmm1
1999; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
2000; SSE2-NEXT:    por %xmm1, %xmm3
2001; SSE2-NEXT:    pxor %xmm8, %xmm0
2002; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
2003; SSE2-NEXT:    pxor %xmm8, %xmm1
2004; SSE2-NEXT:    movdqa %xmm0, %xmm5
2005; SSE2-NEXT:    pcmpgtd %xmm1, %xmm5
2006; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2007; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
2008; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2009; SSE2-NEXT:    pand %xmm6, %xmm0
2010; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
2011; SSE2-NEXT:    por %xmm0, %xmm1
2012; SSE2-NEXT:    packssdw %xmm3, %xmm1
2013; SSE2-NEXT:    packssdw %xmm2, %xmm1
2014; SSE2-NEXT:    packsswb %xmm4, %xmm1
2015; SSE2-NEXT:    pmovmskb %xmm1, %ecx
2016; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
2017; SSE2-NEXT:    pxor %xmm8, %xmm0
2018; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
2019; SSE2-NEXT:    pxor %xmm8, %xmm1
2020; SSE2-NEXT:    movdqa %xmm1, %xmm2
2021; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
2022; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2023; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
2024; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2025; SSE2-NEXT:    pand %xmm3, %xmm0
2026; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2027; SSE2-NEXT:    por %xmm0, %xmm2
2028; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
2029; SSE2-NEXT:    pxor %xmm8, %xmm0
2030; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
2031; SSE2-NEXT:    pxor %xmm8, %xmm1
2032; SSE2-NEXT:    movdqa %xmm1, %xmm3
2033; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
2034; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2035; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
2036; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2037; SSE2-NEXT:    pand %xmm4, %xmm0
2038; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
2039; SSE2-NEXT:    por %xmm0, %xmm1
2040; SSE2-NEXT:    packssdw %xmm2, %xmm1
2041; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
2042; SSE2-NEXT:    pxor %xmm8, %xmm0
2043; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
2044; SSE2-NEXT:    pxor %xmm8, %xmm2
2045; SSE2-NEXT:    movdqa %xmm2, %xmm3
2046; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
2047; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2048; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
2049; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2050; SSE2-NEXT:    pand %xmm4, %xmm0
2051; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
2052; SSE2-NEXT:    por %xmm0, %xmm2
2053; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
2054; SSE2-NEXT:    pxor %xmm8, %xmm0
2055; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
2056; SSE2-NEXT:    pxor %xmm8, %xmm3
2057; SSE2-NEXT:    movdqa %xmm3, %xmm4
2058; SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
2059; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2060; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3
2061; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2062; SSE2-NEXT:    pand %xmm5, %xmm3
2063; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2064; SSE2-NEXT:    por %xmm3, %xmm0
2065; SSE2-NEXT:    packssdw %xmm2, %xmm0
2066; SSE2-NEXT:    packssdw %xmm1, %xmm0
2067; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
2068; SSE2-NEXT:    pxor %xmm8, %xmm1
2069; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
2070; SSE2-NEXT:    pxor %xmm8, %xmm2
2071; SSE2-NEXT:    movdqa %xmm2, %xmm3
2072; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
2073; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2074; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
2075; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
2076; SSE2-NEXT:    pand %xmm4, %xmm1
2077; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
2078; SSE2-NEXT:    por %xmm1, %xmm2
2079; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
2080; SSE2-NEXT:    pxor %xmm8, %xmm1
2081; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
2082; SSE2-NEXT:    pxor %xmm8, %xmm3
2083; SSE2-NEXT:    movdqa %xmm3, %xmm4
2084; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
2085; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2086; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
2087; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2088; SSE2-NEXT:    pand %xmm5, %xmm3
2089; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
2090; SSE2-NEXT:    por %xmm3, %xmm1
2091; SSE2-NEXT:    packssdw %xmm2, %xmm1
2092; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
2093; SSE2-NEXT:    pxor %xmm8, %xmm2
2094; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
2095; SSE2-NEXT:    pxor %xmm8, %xmm3
2096; SSE2-NEXT:    movdqa %xmm3, %xmm4
2097; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
2098; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2099; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
2100; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
2101; SSE2-NEXT:    pand %xmm5, %xmm2
2102; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2103; SSE2-NEXT:    por %xmm2, %xmm3
2104; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
2105; SSE2-NEXT:    pxor %xmm8, %xmm2
2106; SSE2-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm8
2107; SSE2-NEXT:    movdqa %xmm8, %xmm4
2108; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
2109; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2110; SSE2-NEXT:    pcmpeqd %xmm2, %xmm8
2111; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm8[1,1,3,3]
2112; SSE2-NEXT:    pand %xmm5, %xmm2
2113; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2114; SSE2-NEXT:    por %xmm2, %xmm4
2115; SSE2-NEXT:    packssdw %xmm3, %xmm4
2116; SSE2-NEXT:    packssdw %xmm1, %xmm4
2117; SSE2-NEXT:    packsswb %xmm0, %xmm4
2118; SSE2-NEXT:    pmovmskb %xmm4, %edx
2119; SSE2-NEXT:    shll $16, %edx
2120; SSE2-NEXT:    orl %ecx, %edx
2121; SSE2-NEXT:    movl %edx, (%rdi)
2122; SSE2-NEXT:    retq
2123;
2124; SSE42-LABEL: test_cmp_v32i64:
2125; SSE42:       # %bb.0:
2126; SSE42-NEXT:    movq %rdi, %rax
2127; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
2128; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
2129; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
2130; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
2131; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
2132; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
2133; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
2134; SSE42-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
2135; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm7
2136; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm6
2137; SSE42-NEXT:    packssdw %xmm7, %xmm6
2138; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm5
2139; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm4
2140; SSE42-NEXT:    packssdw %xmm5, %xmm4
2141; SSE42-NEXT:    packssdw %xmm6, %xmm4
2142; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm3
2143; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm2
2144; SSE42-NEXT:    packssdw %xmm3, %xmm2
2145; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm1
2146; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm0
2147; SSE42-NEXT:    packssdw %xmm1, %xmm0
2148; SSE42-NEXT:    packssdw %xmm2, %xmm0
2149; SSE42-NEXT:    packsswb %xmm4, %xmm0
2150; SSE42-NEXT:    pmovmskb %xmm0, %ecx
2151; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm15
2152; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm13
2153; SSE42-NEXT:    packssdw %xmm15, %xmm13
2154; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm14
2155; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm9
2156; SSE42-NEXT:    packssdw %xmm14, %xmm9
2157; SSE42-NEXT:    packssdw %xmm13, %xmm9
2158; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm12
2159; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm10
2160; SSE42-NEXT:    packssdw %xmm12, %xmm10
2161; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm11
2162; SSE42-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm8
2163; SSE42-NEXT:    packssdw %xmm11, %xmm8
2164; SSE42-NEXT:    packssdw %xmm10, %xmm8
2165; SSE42-NEXT:    packsswb %xmm9, %xmm8
2166; SSE42-NEXT:    pmovmskb %xmm8, %edx
2167; SSE42-NEXT:    shll $16, %edx
2168; SSE42-NEXT:    orl %ecx, %edx
2169; SSE42-NEXT:    movl %edx, (%rdi)
2170; SSE42-NEXT:    retq
2171;
2172; AVX1-LABEL: test_cmp_v32i64:
2173; AVX1:       # %bb.0:
2174; AVX1-NEXT:    pushq %rbp
2175; AVX1-NEXT:    movq %rsp, %rbp
2176; AVX1-NEXT:    andq $-32, %rsp
2177; AVX1-NEXT:    subq $32, %rsp
2178; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
2179; AVX1-NEXT:    vpcmpgtq 256(%rbp), %xmm8, %xmm8
2180; AVX1-NEXT:    vpcmpgtq 240(%rbp), %xmm7, %xmm7
2181; AVX1-NEXT:    vpackssdw %xmm8, %xmm7, %xmm7
2182; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm8
2183; AVX1-NEXT:    vpcmpgtq 224(%rbp), %xmm8, %xmm8
2184; AVX1-NEXT:    vpcmpgtq 208(%rbp), %xmm6, %xmm6
2185; AVX1-NEXT:    vpackssdw %xmm8, %xmm6, %xmm6
2186; AVX1-NEXT:    vpackssdw %xmm7, %xmm6, %xmm6
2187; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm7
2188; AVX1-NEXT:    vpcmpgtq 192(%rbp), %xmm7, %xmm7
2189; AVX1-NEXT:    vpcmpgtq 176(%rbp), %xmm5, %xmm5
2190; AVX1-NEXT:    vpackssdw %xmm7, %xmm5, %xmm5
2191; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm7
2192; AVX1-NEXT:    vpcmpgtq 160(%rbp), %xmm7, %xmm7
2193; AVX1-NEXT:    vpcmpgtq 144(%rbp), %xmm4, %xmm4
2194; AVX1-NEXT:    vpackssdw %xmm7, %xmm4, %xmm4
2195; AVX1-NEXT:    vpackssdw %xmm5, %xmm4, %xmm4
2196; AVX1-NEXT:    vpacksswb %xmm6, %xmm4, %xmm4
2197; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
2198; AVX1-NEXT:    vpcmpgtq 128(%rbp), %xmm5, %xmm5
2199; AVX1-NEXT:    vpcmpgtq 112(%rbp), %xmm3, %xmm3
2200; AVX1-NEXT:    vpackssdw %xmm5, %xmm3, %xmm3
2201; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
2202; AVX1-NEXT:    vpcmpgtq 96(%rbp), %xmm5, %xmm5
2203; AVX1-NEXT:    vpcmpgtq 80(%rbp), %xmm2, %xmm2
2204; AVX1-NEXT:    vpackssdw %xmm5, %xmm2, %xmm2
2205; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
2206; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2207; AVX1-NEXT:    vpcmpgtq 64(%rbp), %xmm3, %xmm3
2208; AVX1-NEXT:    vpcmpgtq 48(%rbp), %xmm1, %xmm1
2209; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
2210; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2211; AVX1-NEXT:    vpcmpgtq 32(%rbp), %xmm3, %xmm3
2212; AVX1-NEXT:    vpcmpgtq 16(%rbp), %xmm0, %xmm0
2213; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2214; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2215; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
2216; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
2217; AVX1-NEXT:    movq %rbp, %rsp
2218; AVX1-NEXT:    popq %rbp
2219; AVX1-NEXT:    retq
2220;
2221; AVX2-LABEL: test_cmp_v32i64:
2222; AVX2:       # %bb.0:
2223; AVX2-NEXT:    pushq %rbp
2224; AVX2-NEXT:    movq %rsp, %rbp
2225; AVX2-NEXT:    andq $-32, %rsp
2226; AVX2-NEXT:    subq $32, %rsp
2227; AVX2-NEXT:    vpcmpgtq 240(%rbp), %ymm7, %ymm7
2228; AVX2-NEXT:    vpcmpgtq 208(%rbp), %ymm6, %ymm6
2229; AVX2-NEXT:    vpackssdw %ymm7, %ymm6, %ymm6
2230; AVX2-NEXT:    vpermq {{.*#+}} ymm6 = ymm6[0,2,1,3]
2231; AVX2-NEXT:    vpcmpgtq 176(%rbp), %ymm5, %ymm5
2232; AVX2-NEXT:    vpcmpgtq 144(%rbp), %ymm4, %ymm4
2233; AVX2-NEXT:    vpackssdw %ymm5, %ymm4, %ymm4
2234; AVX2-NEXT:    vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3]
2235; AVX2-NEXT:    vpackssdw %ymm6, %ymm4, %ymm4
2236; AVX2-NEXT:    vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3]
2237; AVX2-NEXT:    vpcmpgtq 112(%rbp), %ymm3, %ymm3
2238; AVX2-NEXT:    vpcmpgtq 80(%rbp), %ymm2, %ymm2
2239; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
2240; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2241; AVX2-NEXT:    vpcmpgtq 48(%rbp), %ymm1, %ymm1
2242; AVX2-NEXT:    vpcmpgtq 16(%rbp), %ymm0, %ymm0
2243; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2244; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2245; AVX2-NEXT:    vpackssdw %ymm2, %ymm0, %ymm0
2246; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2247; AVX2-NEXT:    vpacksswb %ymm4, %ymm0, %ymm0
2248; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2249; AVX2-NEXT:    movq %rbp, %rsp
2250; AVX2-NEXT:    popq %rbp
2251; AVX2-NEXT:    retq
2252;
2253; AVX512F-LABEL: test_cmp_v32i64:
2254; AVX512F:       # %bb.0:
2255; AVX512F-NEXT:    vpcmpgtq %zmm6, %zmm2, %k0
2256; AVX512F-NEXT:    vpcmpgtq %zmm7, %zmm3, %k1
2257; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
2258; AVX512F-NEXT:    vpcmpgtq %zmm4, %zmm0, %k0
2259; AVX512F-NEXT:    vpcmpgtq %zmm5, %zmm1, %k2
2260; AVX512F-NEXT:    kunpckbw %k0, %k2, %k2
2261; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
2262; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
2263; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
2264; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
2265; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2266; AVX512F-NEXT:    retq
2267;
2268; AVX512DQ-LABEL: test_cmp_v32i64:
2269; AVX512DQ:       # %bb.0:
2270; AVX512DQ-NEXT:    vpcmpgtq %zmm6, %zmm2, %k0
2271; AVX512DQ-NEXT:    vpcmpgtq %zmm7, %zmm3, %k1
2272; AVX512DQ-NEXT:    kunpckbw %k0, %k1, %k0
2273; AVX512DQ-NEXT:    vpcmpgtq %zmm4, %zmm0, %k1
2274; AVX512DQ-NEXT:    vpcmpgtq %zmm5, %zmm1, %k2
2275; AVX512DQ-NEXT:    kunpckbw %k1, %k2, %k1
2276; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm0
2277; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
2278; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm1
2279; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
2280; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2281; AVX512DQ-NEXT:    retq
2282;
2283; AVX512BW-LABEL: test_cmp_v32i64:
2284; AVX512BW:       # %bb.0:
2285; AVX512BW-NEXT:    vpcmpgtq %zmm4, %zmm0, %k0
2286; AVX512BW-NEXT:    vpcmpgtq %zmm5, %zmm1, %k1
2287; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0
2288; AVX512BW-NEXT:    vpcmpgtq %zmm6, %zmm2, %k1
2289; AVX512BW-NEXT:    vpcmpgtq %zmm7, %zmm3, %k2
2290; AVX512BW-NEXT:    kunpckbw %k1, %k2, %k1
2291; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
2292; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
2293; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2294; AVX512BW-NEXT:    retq
2295  %1 = icmp sgt <32 x i64> %a0, %a1
2296  ret <32 x i1> %1
2297}
2298