xref: /llvm-project/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (revision a70d5e25f32ebd5f1d1c394312036a37591e998b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
4
5define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7; VLX:       # %bb.0: # %entry
8; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
9; VLX-NEXT:    kmovd %k0, %eax
10; VLX-NEXT:    retq
11;
12; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13; NoVLX:       # %bb.0: # %entry
14; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
15; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
16; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
17; NoVLX-NEXT:    kmovw %k0, %eax
18; NoVLX-NEXT:    vzeroupper
19; NoVLX-NEXT:    retq
20entry:
21  %0 = bitcast <2 x i64> %__a to <16 x i8>
22  %1 = bitcast <2 x i64> %__b to <16 x i8>
23  %2 = icmp eq <16 x i8> %0, %1
24  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25  %4 = bitcast <32 x i1> %3 to i32
26  ret i32 %4
27}
28
29define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
30; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31; VLX:       # %bb.0: # %entry
32; VLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %k0
33; VLX-NEXT:    kmovd %k0, %eax
34; VLX-NEXT:    retq
35;
36; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37; NoVLX:       # %bb.0: # %entry
38; NoVLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0
39; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
40; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
41; NoVLX-NEXT:    kmovw %k0, %eax
42; NoVLX-NEXT:    vzeroupper
43; NoVLX-NEXT:    retq
44entry:
45  %0 = bitcast <2 x i64> %__a to <16 x i8>
46  %load = load <2 x i64>, ptr %__b
47  %1 = bitcast <2 x i64> %load to <16 x i8>
48  %2 = icmp eq <16 x i8> %0, %1
49  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50  %4 = bitcast <32 x i1> %3 to i32
51  ret i32 %4
52}
53
54define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56; VLX:       # %bb.0: # %entry
57; VLX-NEXT:    kmovd %edi, %k1
58; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59; VLX-NEXT:    kmovd %k0, %eax
60; VLX-NEXT:    retq
61;
62; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63; NoVLX:       # %bb.0: # %entry
64; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
65; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
66; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
67; NoVLX-NEXT:    kmovw %k0, %eax
68; NoVLX-NEXT:    andl %edi, %eax
69; NoVLX-NEXT:    vzeroupper
70; NoVLX-NEXT:    retq
71entry:
72  %0 = bitcast <2 x i64> %__a to <16 x i8>
73  %1 = bitcast <2 x i64> %__b to <16 x i8>
74  %2 = icmp eq <16 x i8> %0, %1
75  %3 = bitcast i16 %__u to <16 x i1>
76  %4 = and <16 x i1> %2, %3
77  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78  %6 = bitcast <32 x i1> %5 to i32
79  ret i32 %6
80}
81
82define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
83; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84; VLX:       # %bb.0: # %entry
85; VLX-NEXT:    kmovd %edi, %k1
86; VLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87; VLX-NEXT:    kmovd %k0, %eax
88; VLX-NEXT:    retq
89;
90; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91; NoVLX:       # %bb.0: # %entry
92; NoVLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %xmm0
93; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
94; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
95; NoVLX-NEXT:    kmovw %k0, %eax
96; NoVLX-NEXT:    andl %edi, %eax
97; NoVLX-NEXT:    vzeroupper
98; NoVLX-NEXT:    retq
99entry:
100  %0 = bitcast <2 x i64> %__a to <16 x i8>
101  %load = load <2 x i64>, ptr %__b
102  %1 = bitcast <2 x i64> %load to <16 x i8>
103  %2 = icmp eq <16 x i8> %0, %1
104  %3 = bitcast i16 %__u to <16 x i1>
105  %4 = and <16 x i1> %2, %3
106  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107  %6 = bitcast <32 x i1> %5 to i32
108  ret i32 %6
109}
110
111
112define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114; VLX:       # %bb.0: # %entry
115; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
116; VLX-NEXT:    kmovq %k0, %rax
117; VLX-NEXT:    retq
118;
119; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120; NoVLX:       # %bb.0: # %entry
121; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
122; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
123; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
124; NoVLX-NEXT:    kmovw %k0, %eax
125; NoVLX-NEXT:    vzeroupper
126; NoVLX-NEXT:    retq
127entry:
128  %0 = bitcast <2 x i64> %__a to <16 x i8>
129  %1 = bitcast <2 x i64> %__b to <16 x i8>
130  %2 = icmp eq <16 x i8> %0, %1
131  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132  %4 = bitcast <64 x i1> %3 to i64
133  ret i64 %4
134}
135
136define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
137; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
138; VLX:       # %bb.0: # %entry
139; VLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %k0
140; VLX-NEXT:    kmovq %k0, %rax
141; VLX-NEXT:    retq
142;
143; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
144; NoVLX:       # %bb.0: # %entry
145; NoVLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0
146; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
147; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
148; NoVLX-NEXT:    kmovw %k0, %eax
149; NoVLX-NEXT:    vzeroupper
150; NoVLX-NEXT:    retq
151entry:
152  %0 = bitcast <2 x i64> %__a to <16 x i8>
153  %load = load <2 x i64>, ptr %__b
154  %1 = bitcast <2 x i64> %load to <16 x i8>
155  %2 = icmp eq <16 x i8> %0, %1
156  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
157  %4 = bitcast <64 x i1> %3 to i64
158  ret i64 %4
159}
160
161define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
162; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
163; VLX:       # %bb.0: # %entry
164; VLX-NEXT:    kmovd %edi, %k1
165; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
166; VLX-NEXT:    kmovq %k0, %rax
167; VLX-NEXT:    retq
168;
169; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
170; NoVLX:       # %bb.0: # %entry
171; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
172; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
173; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
174; NoVLX-NEXT:    kmovw %k0, %eax
175; NoVLX-NEXT:    andl %edi, %eax
176; NoVLX-NEXT:    vzeroupper
177; NoVLX-NEXT:    retq
178entry:
179  %0 = bitcast <2 x i64> %__a to <16 x i8>
180  %1 = bitcast <2 x i64> %__b to <16 x i8>
181  %2 = icmp eq <16 x i8> %0, %1
182  %3 = bitcast i16 %__u to <16 x i1>
183  %4 = and <16 x i1> %2, %3
184  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
185  %6 = bitcast <64 x i1> %5 to i64
186  ret i64 %6
187}
188
189define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
190; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
191; VLX:       # %bb.0: # %entry
192; VLX-NEXT:    kmovd %edi, %k1
193; VLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
194; VLX-NEXT:    kmovq %k0, %rax
195; VLX-NEXT:    retq
196;
197; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
198; NoVLX:       # %bb.0: # %entry
199; NoVLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %xmm0
200; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
201; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
202; NoVLX-NEXT:    kmovw %k0, %eax
203; NoVLX-NEXT:    andl %edi, %eax
204; NoVLX-NEXT:    vzeroupper
205; NoVLX-NEXT:    retq
206entry:
207  %0 = bitcast <2 x i64> %__a to <16 x i8>
208  %load = load <2 x i64>, ptr %__b
209  %1 = bitcast <2 x i64> %load to <16 x i8>
210  %2 = icmp eq <16 x i8> %0, %1
211  %3 = bitcast i16 %__u to <16 x i1>
212  %4 = and <16 x i1> %2, %3
213  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
214  %6 = bitcast <64 x i1> %5 to i64
215  ret i64 %6
216}
217
218
219define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
220; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
221; VLX:       # %bb.0: # %entry
222; VLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
223; VLX-NEXT:    kmovq %k0, %rax
224; VLX-NEXT:    vzeroupper
225; VLX-NEXT:    retq
226;
227; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
228; NoVLX:       # %bb.0: # %entry
229; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
230; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
231; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
232; NoVLX-NEXT:    kmovw %k0, %ecx
233; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
234; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
235; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
236; NoVLX-NEXT:    kmovw %k0, %eax
237; NoVLX-NEXT:    shll $16, %eax
238; NoVLX-NEXT:    orl %ecx, %eax
239; NoVLX-NEXT:    vzeroupper
240; NoVLX-NEXT:    retq
241entry:
242  %0 = bitcast <4 x i64> %__a to <32 x i8>
243  %1 = bitcast <4 x i64> %__b to <32 x i8>
244  %2 = icmp eq <32 x i8> %0, %1
245  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
246  %4 = bitcast <64 x i1> %3 to i64
247  ret i64 %4
248}
249
250define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
251; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
252; VLX:       # %bb.0: # %entry
253; VLX-NEXT:    vpcmpeqb (%rdi), %ymm0, %k0
254; VLX-NEXT:    kmovq %k0, %rax
255; VLX-NEXT:    vzeroupper
256; VLX-NEXT:    retq
257;
258; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
259; NoVLX:       # %bb.0: # %entry
260; NoVLX-NEXT:    vpcmpeqb (%rdi), %ymm0, %ymm0
261; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
262; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
263; NoVLX-NEXT:    kmovw %k0, %ecx
264; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
265; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
266; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
267; NoVLX-NEXT:    kmovw %k0, %eax
268; NoVLX-NEXT:    shll $16, %eax
269; NoVLX-NEXT:    orl %ecx, %eax
270; NoVLX-NEXT:    vzeroupper
271; NoVLX-NEXT:    retq
272entry:
273  %0 = bitcast <4 x i64> %__a to <32 x i8>
274  %load = load <4 x i64>, ptr %__b
275  %1 = bitcast <4 x i64> %load to <32 x i8>
276  %2 = icmp eq <32 x i8> %0, %1
277  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
278  %4 = bitcast <64 x i1> %3 to i64
279  ret i64 %4
280}
281
282define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
283; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
284; VLX:       # %bb.0: # %entry
285; VLX-NEXT:    kmovd %edi, %k1
286; VLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
287; VLX-NEXT:    kmovq %k0, %rax
288; VLX-NEXT:    vzeroupper
289; VLX-NEXT:    retq
290;
291; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
292; NoVLX:       # %bb.0: # %entry
293; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
294; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
295; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
296; NoVLX-NEXT:    kmovw %k0, %eax
297; NoVLX-NEXT:    andl %edi, %eax
298; NoVLX-NEXT:    shrl $16, %edi
299; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
300; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
301; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
302; NoVLX-NEXT:    kmovw %k0, %ecx
303; NoVLX-NEXT:    andl %edi, %ecx
304; NoVLX-NEXT:    shll $16, %ecx
305; NoVLX-NEXT:    movzwl %ax, %eax
306; NoVLX-NEXT:    orl %ecx, %eax
307; NoVLX-NEXT:    vzeroupper
308; NoVLX-NEXT:    retq
309entry:
310  %0 = bitcast <4 x i64> %__a to <32 x i8>
311  %1 = bitcast <4 x i64> %__b to <32 x i8>
312  %2 = icmp eq <32 x i8> %0, %1
313  %3 = bitcast i32 %__u to <32 x i1>
314  %4 = and <32 x i1> %2, %3
315  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
316  %6 = bitcast <64 x i1> %5 to i64
317  ret i64 %6
318}
319
320define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
321; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
322; VLX:       # %bb.0: # %entry
323; VLX-NEXT:    kmovd %edi, %k1
324; VLX-NEXT:    vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
325; VLX-NEXT:    kmovq %k0, %rax
326; VLX-NEXT:    vzeroupper
327; VLX-NEXT:    retq
328;
329; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
330; NoVLX:       # %bb.0: # %entry
331; NoVLX-NEXT:    vpcmpeqb (%rsi), %ymm0, %ymm0
332; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
333; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
334; NoVLX-NEXT:    kmovw %k0, %eax
335; NoVLX-NEXT:    andl %edi, %eax
336; NoVLX-NEXT:    shrl $16, %edi
337; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
338; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
339; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
340; NoVLX-NEXT:    kmovw %k0, %ecx
341; NoVLX-NEXT:    andl %edi, %ecx
342; NoVLX-NEXT:    shll $16, %ecx
343; NoVLX-NEXT:    movzwl %ax, %eax
344; NoVLX-NEXT:    orl %ecx, %eax
345; NoVLX-NEXT:    vzeroupper
346; NoVLX-NEXT:    retq
347entry:
348  %0 = bitcast <4 x i64> %__a to <32 x i8>
349  %load = load <4 x i64>, ptr %__b
350  %1 = bitcast <4 x i64> %load to <32 x i8>
351  %2 = icmp eq <32 x i8> %0, %1
352  %3 = bitcast i32 %__u to <32 x i1>
353  %4 = and <32 x i1> %2, %3
354  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
355  %6 = bitcast <64 x i1> %5 to i64
356  ret i64 %6
357}
358
359
360define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
361; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
362; VLX:       # %bb.0: # %entry
363; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
364; VLX-NEXT:    kmovd %k0, %eax
365; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
366; VLX-NEXT:    retq
367;
368; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
369; NoVLX:       # %bb.0: # %entry
370; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
371; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
372; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
373; NoVLX-NEXT:    kmovw %k0, %eax
374; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
375; NoVLX-NEXT:    vzeroupper
376; NoVLX-NEXT:    retq
377entry:
378  %0 = bitcast <2 x i64> %__a to <8 x i16>
379  %1 = bitcast <2 x i64> %__b to <8 x i16>
380  %2 = icmp eq <8 x i16> %0, %1
381  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
382  %4 = bitcast <16 x i1> %3 to i16
383  ret i16 %4
384}
385
386define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
387; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
388; VLX:       # %bb.0: # %entry
389; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
390; VLX-NEXT:    kmovd %k0, %eax
391; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
392; VLX-NEXT:    retq
393;
394; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
395; NoVLX:       # %bb.0: # %entry
396; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
397; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
398; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
399; NoVLX-NEXT:    kmovw %k0, %eax
400; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
401; NoVLX-NEXT:    vzeroupper
402; NoVLX-NEXT:    retq
403entry:
404  %0 = bitcast <2 x i64> %__a to <8 x i16>
405  %load = load <2 x i64>, ptr %__b
406  %1 = bitcast <2 x i64> %load to <8 x i16>
407  %2 = icmp eq <8 x i16> %0, %1
408  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
409  %4 = bitcast <16 x i1> %3 to i16
410  ret i16 %4
411}
412
413define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
414; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
415; VLX:       # %bb.0: # %entry
416; VLX-NEXT:    kmovd %edi, %k1
417; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
418; VLX-NEXT:    kmovd %k0, %eax
419; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
420; VLX-NEXT:    retq
421;
422; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
423; NoVLX:       # %bb.0: # %entry
424; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
425; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
426; NoVLX-NEXT:    kmovw %edi, %k1
427; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
428; NoVLX-NEXT:    kmovw %k0, %eax
429; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
430; NoVLX-NEXT:    vzeroupper
431; NoVLX-NEXT:    retq
432entry:
433  %0 = bitcast <2 x i64> %__a to <8 x i16>
434  %1 = bitcast <2 x i64> %__b to <8 x i16>
435  %2 = icmp eq <8 x i16> %0, %1
436  %3 = bitcast i8 %__u to <8 x i1>
437  %4 = and <8 x i1> %2, %3
438  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
439  %6 = bitcast <16 x i1> %5 to i16
440  ret i16 %6
441}
442
443define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
444; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
445; VLX:       # %bb.0: # %entry
446; VLX-NEXT:    kmovd %edi, %k1
447; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
448; VLX-NEXT:    kmovd %k0, %eax
449; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
450; VLX-NEXT:    retq
451;
452; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
453; NoVLX:       # %bb.0: # %entry
454; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
455; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
456; NoVLX-NEXT:    kmovw %edi, %k1
457; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
458; NoVLX-NEXT:    kmovw %k0, %eax
459; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
460; NoVLX-NEXT:    vzeroupper
461; NoVLX-NEXT:    retq
462entry:
463  %0 = bitcast <2 x i64> %__a to <8 x i16>
464  %load = load <2 x i64>, ptr %__b
465  %1 = bitcast <2 x i64> %load to <8 x i16>
466  %2 = icmp eq <8 x i16> %0, %1
467  %3 = bitcast i8 %__u to <8 x i1>
468  %4 = and <8 x i1> %2, %3
469  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470  %6 = bitcast <16 x i1> %5 to i16
471  ret i16 %6
472}
473
474
475define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
476; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
477; VLX:       # %bb.0: # %entry
478; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
479; VLX-NEXT:    kmovd %k0, %eax
480; VLX-NEXT:    retq
481;
482; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
483; NoVLX:       # %bb.0: # %entry
484; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
485; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
486; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
487; NoVLX-NEXT:    kmovw %k0, %eax
488; NoVLX-NEXT:    vzeroupper
489; NoVLX-NEXT:    retq
490entry:
491  %0 = bitcast <2 x i64> %__a to <8 x i16>
492  %1 = bitcast <2 x i64> %__b to <8 x i16>
493  %2 = icmp eq <8 x i16> %0, %1
494  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
495  %4 = bitcast <32 x i1> %3 to i32
496  ret i32 %4
497}
498
499define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
500; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
501; VLX:       # %bb.0: # %entry
502; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
503; VLX-NEXT:    kmovd %k0, %eax
504; VLX-NEXT:    retq
505;
506; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
507; NoVLX:       # %bb.0: # %entry
508; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
509; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
510; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
511; NoVLX-NEXT:    kmovw %k0, %eax
512; NoVLX-NEXT:    vzeroupper
513; NoVLX-NEXT:    retq
514entry:
515  %0 = bitcast <2 x i64> %__a to <8 x i16>
516  %load = load <2 x i64>, ptr %__b
517  %1 = bitcast <2 x i64> %load to <8 x i16>
518  %2 = icmp eq <8 x i16> %0, %1
519  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520  %4 = bitcast <32 x i1> %3 to i32
521  ret i32 %4
522}
523
524define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
525; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
526; VLX:       # %bb.0: # %entry
527; VLX-NEXT:    kmovd %edi, %k1
528; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
529; VLX-NEXT:    kmovd %k0, %eax
530; VLX-NEXT:    retq
531;
532; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
533; NoVLX:       # %bb.0: # %entry
534; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
535; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
536; NoVLX-NEXT:    kmovw %edi, %k1
537; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
538; NoVLX-NEXT:    kmovw %k0, %eax
539; NoVLX-NEXT:    vzeroupper
540; NoVLX-NEXT:    retq
541entry:
542  %0 = bitcast <2 x i64> %__a to <8 x i16>
543  %1 = bitcast <2 x i64> %__b to <8 x i16>
544  %2 = icmp eq <8 x i16> %0, %1
545  %3 = bitcast i8 %__u to <8 x i1>
546  %4 = and <8 x i1> %2, %3
547  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
548  %6 = bitcast <32 x i1> %5 to i32
549  ret i32 %6
550}
551
552define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
553; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
554; VLX:       # %bb.0: # %entry
555; VLX-NEXT:    kmovd %edi, %k1
556; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
557; VLX-NEXT:    kmovd %k0, %eax
558; VLX-NEXT:    retq
559;
560; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
561; NoVLX:       # %bb.0: # %entry
562; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
563; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
564; NoVLX-NEXT:    kmovw %edi, %k1
565; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
566; NoVLX-NEXT:    kmovw %k0, %eax
567; NoVLX-NEXT:    vzeroupper
568; NoVLX-NEXT:    retq
569entry:
570  %0 = bitcast <2 x i64> %__a to <8 x i16>
571  %load = load <2 x i64>, ptr %__b
572  %1 = bitcast <2 x i64> %load to <8 x i16>
573  %2 = icmp eq <8 x i16> %0, %1
574  %3 = bitcast i8 %__u to <8 x i1>
575  %4 = and <8 x i1> %2, %3
576  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
577  %6 = bitcast <32 x i1> %5 to i32
578  ret i32 %6
579}
580
581
582define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
583; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
584; VLX:       # %bb.0: # %entry
585; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
586; VLX-NEXT:    kmovq %k0, %rax
587; VLX-NEXT:    retq
588;
589; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
590; NoVLX:       # %bb.0: # %entry
591; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
592; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
593; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
594; NoVLX-NEXT:    kmovw %k0, %eax
595; NoVLX-NEXT:    vzeroupper
596; NoVLX-NEXT:    retq
597entry:
598  %0 = bitcast <2 x i64> %__a to <8 x i16>
599  %1 = bitcast <2 x i64> %__b to <8 x i16>
600  %2 = icmp eq <8 x i16> %0, %1
601  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
602  %4 = bitcast <64 x i1> %3 to i64
603  ret i64 %4
604}
605
606define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
607; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
608; VLX:       # %bb.0: # %entry
609; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
610; VLX-NEXT:    kmovq %k0, %rax
611; VLX-NEXT:    retq
612;
613; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
614; NoVLX:       # %bb.0: # %entry
615; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
616; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
617; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
618; NoVLX-NEXT:    kmovw %k0, %eax
619; NoVLX-NEXT:    vzeroupper
620; NoVLX-NEXT:    retq
621entry:
622  %0 = bitcast <2 x i64> %__a to <8 x i16>
623  %load = load <2 x i64>, ptr %__b
624  %1 = bitcast <2 x i64> %load to <8 x i16>
625  %2 = icmp eq <8 x i16> %0, %1
626  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
627  %4 = bitcast <64 x i1> %3 to i64
628  ret i64 %4
629}
630
631define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
632; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
633; VLX:       # %bb.0: # %entry
634; VLX-NEXT:    kmovd %edi, %k1
635; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
636; VLX-NEXT:    kmovq %k0, %rax
637; VLX-NEXT:    retq
638;
639; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
640; NoVLX:       # %bb.0: # %entry
641; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
642; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
643; NoVLX-NEXT:    kmovw %edi, %k1
644; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
645; NoVLX-NEXT:    kmovw %k0, %eax
646; NoVLX-NEXT:    vzeroupper
647; NoVLX-NEXT:    retq
648entry:
649  %0 = bitcast <2 x i64> %__a to <8 x i16>
650  %1 = bitcast <2 x i64> %__b to <8 x i16>
651  %2 = icmp eq <8 x i16> %0, %1
652  %3 = bitcast i8 %__u to <8 x i1>
653  %4 = and <8 x i1> %2, %3
654  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
655  %6 = bitcast <64 x i1> %5 to i64
656  ret i64 %6
657}
658
659define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
660; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
661; VLX:       # %bb.0: # %entry
662; VLX-NEXT:    kmovd %edi, %k1
663; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
664; VLX-NEXT:    kmovq %k0, %rax
665; VLX-NEXT:    retq
666;
667; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
668; NoVLX:       # %bb.0: # %entry
669; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
670; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
671; NoVLX-NEXT:    kmovw %edi, %k1
672; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
673; NoVLX-NEXT:    kmovw %k0, %eax
674; NoVLX-NEXT:    vzeroupper
675; NoVLX-NEXT:    retq
676entry:
677  %0 = bitcast <2 x i64> %__a to <8 x i16>
678  %load = load <2 x i64>, ptr %__b
679  %1 = bitcast <2 x i64> %load to <8 x i16>
680  %2 = icmp eq <8 x i16> %0, %1
681  %3 = bitcast i8 %__u to <8 x i1>
682  %4 = and <8 x i1> %2, %3
683  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
684  %6 = bitcast <64 x i1> %5 to i64
685  ret i64 %6
686}
687
688
689define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
690; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
691; VLX:       # %bb.0: # %entry
692; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
693; VLX-NEXT:    kmovd %k0, %eax
694; VLX-NEXT:    vzeroupper
695; VLX-NEXT:    retq
696;
697; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
698; NoVLX:       # %bb.0: # %entry
699; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
700; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
701; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
702; NoVLX-NEXT:    kmovw %k0, %eax
703; NoVLX-NEXT:    vzeroupper
704; NoVLX-NEXT:    retq
705entry:
706  %0 = bitcast <4 x i64> %__a to <16 x i16>
707  %1 = bitcast <4 x i64> %__b to <16 x i16>
708  %2 = icmp eq <16 x i16> %0, %1
709  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
710  %4 = bitcast <32 x i1> %3 to i32
711  ret i32 %4
712}
713
714define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
715; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
716; VLX:       # %bb.0: # %entry
717; VLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %k0
718; VLX-NEXT:    kmovd %k0, %eax
719; VLX-NEXT:    vzeroupper
720; VLX-NEXT:    retq
721;
722; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
723; NoVLX:       # %bb.0: # %entry
724; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0
725; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
726; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
727; NoVLX-NEXT:    kmovw %k0, %eax
728; NoVLX-NEXT:    vzeroupper
729; NoVLX-NEXT:    retq
730entry:
731  %0 = bitcast <4 x i64> %__a to <16 x i16>
732  %load = load <4 x i64>, ptr %__b
733  %1 = bitcast <4 x i64> %load to <16 x i16>
734  %2 = icmp eq <16 x i16> %0, %1
735  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
736  %4 = bitcast <32 x i1> %3 to i32
737  ret i32 %4
738}
739
740define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
741; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
742; VLX:       # %bb.0: # %entry
743; VLX-NEXT:    kmovd %edi, %k1
744; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
745; VLX-NEXT:    kmovd %k0, %eax
746; VLX-NEXT:    vzeroupper
747; VLX-NEXT:    retq
748;
749; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
750; NoVLX:       # %bb.0: # %entry
751; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
752; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
753; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
754; NoVLX-NEXT:    kmovw %k0, %eax
755; NoVLX-NEXT:    andl %edi, %eax
756; NoVLX-NEXT:    vzeroupper
757; NoVLX-NEXT:    retq
758entry:
759  %0 = bitcast <4 x i64> %__a to <16 x i16>
760  %1 = bitcast <4 x i64> %__b to <16 x i16>
761  %2 = icmp eq <16 x i16> %0, %1
762  %3 = bitcast i16 %__u to <16 x i1>
763  %4 = and <16 x i1> %2, %3
764  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
765  %6 = bitcast <32 x i1> %5 to i32
766  ret i32 %6
767}
768
769define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
770; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
771; VLX:       # %bb.0: # %entry
772; VLX-NEXT:    kmovd %edi, %k1
773; VLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
774; VLX-NEXT:    kmovd %k0, %eax
775; VLX-NEXT:    vzeroupper
776; VLX-NEXT:    retq
777;
778; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
779; NoVLX:       # %bb.0: # %entry
780; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm0
781; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
782; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
783; NoVLX-NEXT:    kmovw %k0, %eax
784; NoVLX-NEXT:    andl %edi, %eax
785; NoVLX-NEXT:    vzeroupper
786; NoVLX-NEXT:    retq
787entry:
788  %0 = bitcast <4 x i64> %__a to <16 x i16>
789  %load = load <4 x i64>, ptr %__b
790  %1 = bitcast <4 x i64> %load to <16 x i16>
791  %2 = icmp eq <16 x i16> %0, %1
792  %3 = bitcast i16 %__u to <16 x i1>
793  %4 = and <16 x i1> %2, %3
794  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
795  %6 = bitcast <32 x i1> %5 to i32
796  ret i32 %6
797}
798
799
800define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
801; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
802; VLX:       # %bb.0: # %entry
803; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
804; VLX-NEXT:    kmovq %k0, %rax
805; VLX-NEXT:    vzeroupper
806; VLX-NEXT:    retq
807;
808; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
809; NoVLX:       # %bb.0: # %entry
810; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
811; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
812; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
813; NoVLX-NEXT:    kmovw %k0, %eax
814; NoVLX-NEXT:    vzeroupper
815; NoVLX-NEXT:    retq
816entry:
817  %0 = bitcast <4 x i64> %__a to <16 x i16>
818  %1 = bitcast <4 x i64> %__b to <16 x i16>
819  %2 = icmp eq <16 x i16> %0, %1
820  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
821  %4 = bitcast <64 x i1> %3 to i64
822  ret i64 %4
823}
824
825define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
826; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
827; VLX:       # %bb.0: # %entry
828; VLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %k0
829; VLX-NEXT:    kmovq %k0, %rax
830; VLX-NEXT:    vzeroupper
831; VLX-NEXT:    retq
832;
833; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834; NoVLX:       # %bb.0: # %entry
835; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0
836; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
837; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
838; NoVLX-NEXT:    kmovw %k0, %eax
839; NoVLX-NEXT:    vzeroupper
840; NoVLX-NEXT:    retq
841entry:
842  %0 = bitcast <4 x i64> %__a to <16 x i16>
843  %load = load <4 x i64>, ptr %__b
844  %1 = bitcast <4 x i64> %load to <16 x i16>
845  %2 = icmp eq <16 x i16> %0, %1
846  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
847  %4 = bitcast <64 x i1> %3 to i64
848  ret i64 %4
849}
850
851define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
852; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
853; VLX:       # %bb.0: # %entry
854; VLX-NEXT:    kmovd %edi, %k1
855; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
856; VLX-NEXT:    kmovq %k0, %rax
857; VLX-NEXT:    vzeroupper
858; VLX-NEXT:    retq
859;
860; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861; NoVLX:       # %bb.0: # %entry
862; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
863; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
864; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
865; NoVLX-NEXT:    kmovw %k0, %eax
866; NoVLX-NEXT:    andl %edi, %eax
867; NoVLX-NEXT:    vzeroupper
868; NoVLX-NEXT:    retq
869entry:
870  %0 = bitcast <4 x i64> %__a to <16 x i16>
871  %1 = bitcast <4 x i64> %__b to <16 x i16>
872  %2 = icmp eq <16 x i16> %0, %1
873  %3 = bitcast i16 %__u to <16 x i1>
874  %4 = and <16 x i1> %2, %3
875  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
876  %6 = bitcast <64 x i1> %5 to i64
877  ret i64 %6
878}
879
880define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
881; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
882; VLX:       # %bb.0: # %entry
883; VLX-NEXT:    kmovd %edi, %k1
884; VLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
885; VLX-NEXT:    kmovq %k0, %rax
886; VLX-NEXT:    vzeroupper
887; VLX-NEXT:    retq
888;
889; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890; NoVLX:       # %bb.0: # %entry
891; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm0
892; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
893; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
894; NoVLX-NEXT:    kmovw %k0, %eax
895; NoVLX-NEXT:    andl %edi, %eax
896; NoVLX-NEXT:    vzeroupper
897; NoVLX-NEXT:    retq
898entry:
899  %0 = bitcast <4 x i64> %__a to <16 x i16>
900  %load = load <4 x i64>, ptr %__b
901  %1 = bitcast <4 x i64> %load to <16 x i16>
902  %2 = icmp eq <16 x i16> %0, %1
903  %3 = bitcast i16 %__u to <16 x i1>
904  %4 = and <16 x i1> %2, %3
905  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
906  %6 = bitcast <64 x i1> %5 to i64
907  ret i64 %6
908}
909
910
911define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
912; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
913; VLX:       # %bb.0: # %entry
914; VLX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
915; VLX-NEXT:    kmovq %k0, %rax
916; VLX-NEXT:    vzeroupper
917; VLX-NEXT:    retq
918;
919; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
920; NoVLX:       # %bb.0: # %entry
921; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm2
922; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
923; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
924; NoVLX-NEXT:    kmovw %k0, %ecx
925; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
926; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
927; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
928; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
929; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
930; NoVLX-NEXT:    kmovw %k0, %eax
931; NoVLX-NEXT:    shll $16, %eax
932; NoVLX-NEXT:    orl %ecx, %eax
933; NoVLX-NEXT:    vzeroupper
934; NoVLX-NEXT:    retq
935entry:
936  %0 = bitcast <8 x i64> %__a to <32 x i16>
937  %1 = bitcast <8 x i64> %__b to <32 x i16>
938  %2 = icmp eq <32 x i16> %0, %1
939  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
940  %4 = bitcast <64 x i1> %3 to i64
941  ret i64 %4
942}
943
944define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
945; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
946; VLX:       # %bb.0: # %entry
947; VLX-NEXT:    vpcmpeqw (%rdi), %zmm0, %k0
948; VLX-NEXT:    kmovq %k0, %rax
949; VLX-NEXT:    vzeroupper
950; VLX-NEXT:    retq
951;
952; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
953; NoVLX:       # %bb.0: # %entry
954; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm1
955; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
956; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
957; NoVLX-NEXT:    kmovw %k0, %ecx
958; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
959; NoVLX-NEXT:    vpcmpeqw 32(%rdi), %ymm0, %ymm0
960; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
961; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
962; NoVLX-NEXT:    kmovw %k0, %eax
963; NoVLX-NEXT:    shll $16, %eax
964; NoVLX-NEXT:    orl %ecx, %eax
965; NoVLX-NEXT:    vzeroupper
966; NoVLX-NEXT:    retq
967entry:
968  %0 = bitcast <8 x i64> %__a to <32 x i16>
969  %load = load <8 x i64>, ptr %__b
970  %1 = bitcast <8 x i64> %load to <32 x i16>
971  %2 = icmp eq <32 x i16> %0, %1
972  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
973  %4 = bitcast <64 x i1> %3 to i64
974  ret i64 %4
975}
976
977define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
978; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
979; VLX:       # %bb.0: # %entry
980; VLX-NEXT:    kmovd %edi, %k1
981; VLX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
982; VLX-NEXT:    kmovq %k0, %rax
983; VLX-NEXT:    vzeroupper
984; VLX-NEXT:    retq
985;
986; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987; NoVLX:       # %bb.0: # %entry
988; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm2
989; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
990; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
991; NoVLX-NEXT:    kmovw %k0, %eax
992; NoVLX-NEXT:    andl %edi, %eax
993; NoVLX-NEXT:    shrl $16, %edi
994; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
995; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
996; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
997; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
998; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
999; NoVLX-NEXT:    kmovw %k0, %ecx
1000; NoVLX-NEXT:    andl %edi, %ecx
1001; NoVLX-NEXT:    shll $16, %ecx
1002; NoVLX-NEXT:    movzwl %ax, %eax
1003; NoVLX-NEXT:    orl %ecx, %eax
1004; NoVLX-NEXT:    vzeroupper
1005; NoVLX-NEXT:    retq
1006entry:
1007  %0 = bitcast <8 x i64> %__a to <32 x i16>
1008  %1 = bitcast <8 x i64> %__b to <32 x i16>
1009  %2 = icmp eq <32 x i16> %0, %1
1010  %3 = bitcast i32 %__u to <32 x i1>
1011  %4 = and <32 x i1> %2, %3
1012  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1013  %6 = bitcast <64 x i1> %5 to i64
1014  ret i64 %6
1015}
1016
1017define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
1018; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1019; VLX:       # %bb.0: # %entry
1020; VLX-NEXT:    kmovd %edi, %k1
1021; VLX-NEXT:    vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1022; VLX-NEXT:    kmovq %k0, %rax
1023; VLX-NEXT:    vzeroupper
1024; VLX-NEXT:    retq
1025;
1026; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027; NoVLX:       # %bb.0: # %entry
1028; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm1
1029; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
1030; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
1031; NoVLX-NEXT:    kmovw %k0, %eax
1032; NoVLX-NEXT:    andl %edi, %eax
1033; NoVLX-NEXT:    shrl $16, %edi
1034; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1035; NoVLX-NEXT:    vpcmpeqw 32(%rsi), %ymm0, %ymm0
1036; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
1037; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
1038; NoVLX-NEXT:    kmovw %k0, %ecx
1039; NoVLX-NEXT:    andl %edi, %ecx
1040; NoVLX-NEXT:    shll $16, %ecx
1041; NoVLX-NEXT:    movzwl %ax, %eax
1042; NoVLX-NEXT:    orl %ecx, %eax
1043; NoVLX-NEXT:    vzeroupper
1044; NoVLX-NEXT:    retq
1045entry:
1046  %0 = bitcast <8 x i64> %__a to <32 x i16>
1047  %load = load <8 x i64>, ptr %__b
1048  %1 = bitcast <8 x i64> %load to <32 x i16>
1049  %2 = icmp eq <32 x i16> %0, %1
1050  %3 = bitcast i32 %__u to <32 x i1>
1051  %4 = and <32 x i1> %2, %3
1052  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1053  %6 = bitcast <64 x i1> %5 to i64
1054  ret i64 %6
1055}
1056
1057
1058define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1059; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1060; VLX:       # %bb.0: # %entry
1061; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1062; VLX-NEXT:    kmovd %k0, %eax
1063; VLX-NEXT:    # kill: def $al killed $al killed $eax
1064; VLX-NEXT:    retq
1065;
1066; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1067; NoVLX:       # %bb.0: # %entry
1068; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1069; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1070; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1071; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1072; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1073; NoVLX-NEXT:    kmovw %k0, %eax
1074; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1075; NoVLX-NEXT:    vzeroupper
1076; NoVLX-NEXT:    retq
1077entry:
1078  %0 = bitcast <2 x i64> %__a to <4 x i32>
1079  %1 = bitcast <2 x i64> %__b to <4 x i32>
1080  %2 = icmp eq <4 x i32> %0, %1
1081  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1082  %4 = bitcast <8 x i1> %3 to i8
1083  ret i8 %4
1084}
1085
1086define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1087; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1088; VLX:       # %bb.0: # %entry
1089; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1090; VLX-NEXT:    kmovd %k0, %eax
1091; VLX-NEXT:    # kill: def $al killed $al killed $eax
1092; VLX-NEXT:    retq
1093;
1094; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1095; NoVLX:       # %bb.0: # %entry
1096; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1097; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1098; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1099; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1100; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1101; NoVLX-NEXT:    kmovw %k0, %eax
1102; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1103; NoVLX-NEXT:    vzeroupper
1104; NoVLX-NEXT:    retq
1105entry:
1106  %0 = bitcast <2 x i64> %__a to <4 x i32>
1107  %load = load <2 x i64>, ptr %__b
1108  %1 = bitcast <2 x i64> %load to <4 x i32>
1109  %2 = icmp eq <4 x i32> %0, %1
1110  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1111  %4 = bitcast <8 x i1> %3 to i8
1112  ret i8 %4
1113}
1114
1115define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1116; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1117; VLX:       # %bb.0: # %entry
1118; VLX-NEXT:    kmovd %edi, %k1
1119; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1120; VLX-NEXT:    kmovd %k0, %eax
1121; VLX-NEXT:    # kill: def $al killed $al killed $eax
1122; VLX-NEXT:    retq
1123;
1124; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125; NoVLX:       # %bb.0: # %entry
1126; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1127; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1128; NoVLX-NEXT:    kmovw %edi, %k1
1129; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1130; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1131; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1132; NoVLX-NEXT:    kmovw %k0, %eax
1133; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1134; NoVLX-NEXT:    vzeroupper
1135; NoVLX-NEXT:    retq
1136entry:
1137  %0 = bitcast <2 x i64> %__a to <4 x i32>
1138  %1 = bitcast <2 x i64> %__b to <4 x i32>
1139  %2 = icmp eq <4 x i32> %0, %1
1140  %3 = bitcast i8 %__u to <8 x i1>
1141  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1142  %4 = and <4 x i1> %2, %extract.i
1143  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144  %6 = bitcast <8 x i1> %5 to i8
1145  ret i8 %6
1146}
1147
1148define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1149; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1150; VLX:       # %bb.0: # %entry
1151; VLX-NEXT:    kmovd %edi, %k1
1152; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1153; VLX-NEXT:    kmovd %k0, %eax
1154; VLX-NEXT:    # kill: def $al killed $al killed $eax
1155; VLX-NEXT:    retq
1156;
1157; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158; NoVLX:       # %bb.0: # %entry
1159; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1160; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1161; NoVLX-NEXT:    kmovw %edi, %k1
1162; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1163; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1164; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1165; NoVLX-NEXT:    kmovw %k0, %eax
1166; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1167; NoVLX-NEXT:    vzeroupper
1168; NoVLX-NEXT:    retq
1169entry:
1170  %0 = bitcast <2 x i64> %__a to <4 x i32>
1171  %load = load <2 x i64>, ptr %__b
1172  %1 = bitcast <2 x i64> %load to <4 x i32>
1173  %2 = icmp eq <4 x i32> %0, %1
1174  %3 = bitcast i8 %__u to <8 x i1>
1175  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1176  %4 = and <4 x i1> %2, %extract.i
1177  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1178  %6 = bitcast <8 x i1> %5 to i8
1179  ret i8 %6
1180}
1181
1182
1183define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1184; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1185; VLX:       # %bb.0: # %entry
1186; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1187; VLX-NEXT:    kmovd %k0, %eax
1188; VLX-NEXT:    # kill: def $al killed $al killed $eax
1189; VLX-NEXT:    retq
1190;
1191; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1192; NoVLX:       # %bb.0: # %entry
1193; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1194; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1195; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1196; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1197; NoVLX-NEXT:    kmovw %k0, %eax
1198; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1199; NoVLX-NEXT:    vzeroupper
1200; NoVLX-NEXT:    retq
1201entry:
1202  %0 = bitcast <2 x i64> %__a to <4 x i32>
1203  %load = load i32, ptr %__b
1204  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1205  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1206  %2 = icmp eq <4 x i32> %0, %1
1207  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1208  %4 = bitcast <8 x i1> %3 to i8
1209  ret i8 %4
1210}
1211
1212define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1213; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1214; VLX:       # %bb.0: # %entry
1215; VLX-NEXT:    kmovd %edi, %k1
1216; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1217; VLX-NEXT:    kmovd %k0, %eax
1218; VLX-NEXT:    # kill: def $al killed $al killed $eax
1219; VLX-NEXT:    retq
1220;
1221; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1222; NoVLX:       # %bb.0: # %entry
1223; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1224; NoVLX-NEXT:    kmovw %edi, %k1
1225; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1226; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1227; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1228; NoVLX-NEXT:    kmovw %k0, %eax
1229; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1230; NoVLX-NEXT:    vzeroupper
1231; NoVLX-NEXT:    retq
1232entry:
1233  %0 = bitcast <2 x i64> %__a to <4 x i32>
1234  %load = load i32, ptr %__b
1235  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1236  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1237  %2 = icmp eq <4 x i32> %0, %1
1238  %3 = bitcast i8 %__u to <8 x i1>
1239  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1240  %4 = and <4 x i1> %extract.i, %2
1241  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1242  %6 = bitcast <8 x i1> %5 to i8
1243  ret i8 %6
1244}
1245
1246
1247define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1248; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1249; VLX:       # %bb.0: # %entry
1250; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1251; VLX-NEXT:    kmovd %k0, %eax
1252; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1253; VLX-NEXT:    retq
1254;
1255; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1256; NoVLX:       # %bb.0: # %entry
1257; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1258; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1259; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1260; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1261; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1262; NoVLX-NEXT:    kmovw %k0, %eax
1263; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1264; NoVLX-NEXT:    vzeroupper
1265; NoVLX-NEXT:    retq
1266entry:
1267  %0 = bitcast <2 x i64> %__a to <4 x i32>
1268  %1 = bitcast <2 x i64> %__b to <4 x i32>
1269  %2 = icmp eq <4 x i32> %0, %1
1270  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1271  %4 = bitcast <16 x i1> %3 to i16
1272  ret i16 %4
1273}
1274
1275define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1276; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1277; VLX:       # %bb.0: # %entry
1278; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1279; VLX-NEXT:    kmovd %k0, %eax
1280; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1281; VLX-NEXT:    retq
1282;
1283; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1284; NoVLX:       # %bb.0: # %entry
1285; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1286; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1287; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1288; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1289; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1290; NoVLX-NEXT:    kmovw %k0, %eax
1291; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1292; NoVLX-NEXT:    vzeroupper
1293; NoVLX-NEXT:    retq
1294entry:
1295  %0 = bitcast <2 x i64> %__a to <4 x i32>
1296  %load = load <2 x i64>, ptr %__b
1297  %1 = bitcast <2 x i64> %load to <4 x i32>
1298  %2 = icmp eq <4 x i32> %0, %1
1299  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1300  %4 = bitcast <16 x i1> %3 to i16
1301  ret i16 %4
1302}
1303
1304define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1305; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1306; VLX:       # %bb.0: # %entry
1307; VLX-NEXT:    kmovd %edi, %k1
1308; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1309; VLX-NEXT:    kmovd %k0, %eax
1310; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1311; VLX-NEXT:    retq
1312;
1313; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1314; NoVLX:       # %bb.0: # %entry
1315; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1316; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1317; NoVLX-NEXT:    kmovw %edi, %k1
1318; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1319; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1320; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1321; NoVLX-NEXT:    kmovw %k0, %eax
1322; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1323; NoVLX-NEXT:    vzeroupper
1324; NoVLX-NEXT:    retq
1325entry:
1326  %0 = bitcast <2 x i64> %__a to <4 x i32>
1327  %1 = bitcast <2 x i64> %__b to <4 x i32>
1328  %2 = icmp eq <4 x i32> %0, %1
1329  %3 = bitcast i8 %__u to <8 x i1>
1330  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1331  %4 = and <4 x i1> %2, %extract.i
1332  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1333  %6 = bitcast <16 x i1> %5 to i16
1334  ret i16 %6
1335}
1336
1337define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1338; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1339; VLX:       # %bb.0: # %entry
1340; VLX-NEXT:    kmovd %edi, %k1
1341; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1342; VLX-NEXT:    kmovd %k0, %eax
1343; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1344; VLX-NEXT:    retq
1345;
1346; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1347; NoVLX:       # %bb.0: # %entry
1348; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1349; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1350; NoVLX-NEXT:    kmovw %edi, %k1
1351; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1352; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1353; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1354; NoVLX-NEXT:    kmovw %k0, %eax
1355; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1356; NoVLX-NEXT:    vzeroupper
1357; NoVLX-NEXT:    retq
1358entry:
1359  %0 = bitcast <2 x i64> %__a to <4 x i32>
1360  %load = load <2 x i64>, ptr %__b
1361  %1 = bitcast <2 x i64> %load to <4 x i32>
1362  %2 = icmp eq <4 x i32> %0, %1
1363  %3 = bitcast i8 %__u to <8 x i1>
1364  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1365  %4 = and <4 x i1> %2, %extract.i
1366  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1367  %6 = bitcast <16 x i1> %5 to i16
1368  ret i16 %6
1369}
1370
1371
1372define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1373; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1374; VLX:       # %bb.0: # %entry
1375; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1376; VLX-NEXT:    kmovd %k0, %eax
1377; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1378; VLX-NEXT:    retq
1379;
1380; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1381; NoVLX:       # %bb.0: # %entry
1382; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1383; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1384; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1385; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1386; NoVLX-NEXT:    kmovw %k0, %eax
1387; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1388; NoVLX-NEXT:    vzeroupper
1389; NoVLX-NEXT:    retq
1390entry:
1391  %0 = bitcast <2 x i64> %__a to <4 x i32>
1392  %load = load i32, ptr %__b
1393  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1394  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1395  %2 = icmp eq <4 x i32> %0, %1
1396  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1397  %4 = bitcast <16 x i1> %3 to i16
1398  ret i16 %4
1399}
1400
1401define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1402; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1403; VLX:       # %bb.0: # %entry
1404; VLX-NEXT:    kmovd %edi, %k1
1405; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1406; VLX-NEXT:    kmovd %k0, %eax
1407; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1408; VLX-NEXT:    retq
1409;
1410; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1411; NoVLX:       # %bb.0: # %entry
1412; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1413; NoVLX-NEXT:    kmovw %edi, %k1
1414; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1415; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1416; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1417; NoVLX-NEXT:    kmovw %k0, %eax
1418; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1419; NoVLX-NEXT:    vzeroupper
1420; NoVLX-NEXT:    retq
1421entry:
1422  %0 = bitcast <2 x i64> %__a to <4 x i32>
1423  %load = load i32, ptr %__b
1424  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1425  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1426  %2 = icmp eq <4 x i32> %0, %1
1427  %3 = bitcast i8 %__u to <8 x i1>
1428  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1429  %4 = and <4 x i1> %extract.i, %2
1430  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1431  %6 = bitcast <16 x i1> %5 to i16
1432  ret i16 %6
1433}
1434
1435
1436define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1437; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1438; VLX:       # %bb.0: # %entry
1439; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1440; VLX-NEXT:    kmovd %k0, %eax
1441; VLX-NEXT:    retq
1442;
1443; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1444; NoVLX:       # %bb.0: # %entry
1445; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1446; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1447; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1448; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1449; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1450; NoVLX-NEXT:    kmovw %k0, %eax
1451; NoVLX-NEXT:    vzeroupper
1452; NoVLX-NEXT:    retq
1453entry:
1454  %0 = bitcast <2 x i64> %__a to <4 x i32>
1455  %1 = bitcast <2 x i64> %__b to <4 x i32>
1456  %2 = icmp eq <4 x i32> %0, %1
1457  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1458  %4 = bitcast <32 x i1> %3 to i32
1459  ret i32 %4
1460}
1461
1462define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1463; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1464; VLX:       # %bb.0: # %entry
1465; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1466; VLX-NEXT:    kmovd %k0, %eax
1467; VLX-NEXT:    retq
1468;
1469; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1470; NoVLX:       # %bb.0: # %entry
1471; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1472; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1473; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1474; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1475; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1476; NoVLX-NEXT:    kmovw %k0, %eax
1477; NoVLX-NEXT:    vzeroupper
1478; NoVLX-NEXT:    retq
1479entry:
1480  %0 = bitcast <2 x i64> %__a to <4 x i32>
1481  %load = load <2 x i64>, ptr %__b
1482  %1 = bitcast <2 x i64> %load to <4 x i32>
1483  %2 = icmp eq <4 x i32> %0, %1
1484  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1485  %4 = bitcast <32 x i1> %3 to i32
1486  ret i32 %4
1487}
1488
1489define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1490; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1491; VLX:       # %bb.0: # %entry
1492; VLX-NEXT:    kmovd %edi, %k1
1493; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1494; VLX-NEXT:    kmovd %k0, %eax
1495; VLX-NEXT:    retq
1496;
1497; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1498; NoVLX:       # %bb.0: # %entry
1499; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1500; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1501; NoVLX-NEXT:    kmovw %edi, %k1
1502; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1503; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1504; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1505; NoVLX-NEXT:    kmovw %k0, %eax
1506; NoVLX-NEXT:    vzeroupper
1507; NoVLX-NEXT:    retq
1508entry:
1509  %0 = bitcast <2 x i64> %__a to <4 x i32>
1510  %1 = bitcast <2 x i64> %__b to <4 x i32>
1511  %2 = icmp eq <4 x i32> %0, %1
1512  %3 = bitcast i8 %__u to <8 x i1>
1513  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1514  %4 = and <4 x i1> %2, %extract.i
1515  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1516  %6 = bitcast <32 x i1> %5 to i32
1517  ret i32 %6
1518}
1519
1520define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1521; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1522; VLX:       # %bb.0: # %entry
1523; VLX-NEXT:    kmovd %edi, %k1
1524; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1525; VLX-NEXT:    kmovd %k0, %eax
1526; VLX-NEXT:    retq
1527;
1528; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1529; NoVLX:       # %bb.0: # %entry
1530; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1531; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1532; NoVLX-NEXT:    kmovw %edi, %k1
1533; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1534; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1535; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1536; NoVLX-NEXT:    kmovw %k0, %eax
1537; NoVLX-NEXT:    vzeroupper
1538; NoVLX-NEXT:    retq
1539entry:
1540  %0 = bitcast <2 x i64> %__a to <4 x i32>
1541  %load = load <2 x i64>, ptr %__b
1542  %1 = bitcast <2 x i64> %load to <4 x i32>
1543  %2 = icmp eq <4 x i32> %0, %1
1544  %3 = bitcast i8 %__u to <8 x i1>
1545  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1546  %4 = and <4 x i1> %2, %extract.i
1547  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1548  %6 = bitcast <32 x i1> %5 to i32
1549  ret i32 %6
1550}
1551
1552
1553define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1554; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1555; VLX:       # %bb.0: # %entry
1556; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1557; VLX-NEXT:    kmovd %k0, %eax
1558; VLX-NEXT:    retq
1559;
1560; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1561; NoVLX:       # %bb.0: # %entry
1562; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1563; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1564; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1565; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1566; NoVLX-NEXT:    kmovw %k0, %eax
1567; NoVLX-NEXT:    vzeroupper
1568; NoVLX-NEXT:    retq
1569entry:
1570  %0 = bitcast <2 x i64> %__a to <4 x i32>
1571  %load = load i32, ptr %__b
1572  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1573  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1574  %2 = icmp eq <4 x i32> %0, %1
1575  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1576  %4 = bitcast <32 x i1> %3 to i32
1577  ret i32 %4
1578}
1579
1580define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1581; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1582; VLX:       # %bb.0: # %entry
1583; VLX-NEXT:    kmovd %edi, %k1
1584; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1585; VLX-NEXT:    kmovd %k0, %eax
1586; VLX-NEXT:    retq
1587;
1588; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1589; NoVLX:       # %bb.0: # %entry
1590; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1591; NoVLX-NEXT:    kmovw %edi, %k1
1592; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1593; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1594; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1595; NoVLX-NEXT:    kmovw %k0, %eax
1596; NoVLX-NEXT:    vzeroupper
1597; NoVLX-NEXT:    retq
1598entry:
1599  %0 = bitcast <2 x i64> %__a to <4 x i32>
1600  %load = load i32, ptr %__b
1601  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1602  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1603  %2 = icmp eq <4 x i32> %0, %1
1604  %3 = bitcast i8 %__u to <8 x i1>
1605  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1606  %4 = and <4 x i1> %extract.i, %2
1607  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1608  %6 = bitcast <32 x i1> %5 to i32
1609  ret i32 %6
1610}
1611
1612
1613define i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_i32(i32 %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1614; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32:
1615; VLX:       # %bb.0: # %entry
1616; VLX-NEXT:    kmovd %edi, %k1
1617; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1618; VLX-NEXT:    kmovb %k0, %eax
1619; VLX-NEXT:    retq
1620;
1621; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32:
1622; NoVLX:       # %bb.0: # %entry
1623; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1624; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1625; NoVLX-NEXT:    kmovw %edi, %k1
1626; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1627; NoVLX-NEXT:    kmovw %k0, %eax
1628; NoVLX-NEXT:    andl $15, %eax
1629; NoVLX-NEXT:    vzeroupper
1630; NoVLX-NEXT:    retq
1631entry:
1632  %0 = bitcast <2 x i64> %__a to <4 x i32>
1633  %1 = bitcast <2 x i64> %__b to <4 x i32>
1634  %2 = icmp eq <4 x i32> %0, %1
1635  %3 = bitcast i32 %__u to <32 x i1>
1636  %extract.i = shufflevector <32 x i1> %3, <32 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1637  %4 = and <4 x i1> %2, %extract.i
1638  %5 = bitcast <4 x i1> %4 to i4
1639  %6 = zext i4 %5 to i32
1640  ret i32 %6
1641}
1642
1643
1644define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1645; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1646; VLX:       # %bb.0: # %entry
1647; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1648; VLX-NEXT:    kmovq %k0, %rax
1649; VLX-NEXT:    retq
1650;
1651; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1652; NoVLX:       # %bb.0: # %entry
1653; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1654; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1655; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1656; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1657; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1658; NoVLX-NEXT:    kmovw %k0, %eax
1659; NoVLX-NEXT:    vzeroupper
1660; NoVLX-NEXT:    retq
1661entry:
1662  %0 = bitcast <2 x i64> %__a to <4 x i32>
1663  %1 = bitcast <2 x i64> %__b to <4 x i32>
1664  %2 = icmp eq <4 x i32> %0, %1
1665  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1666  %4 = bitcast <64 x i1> %3 to i64
1667  ret i64 %4
1668}
1669
1670define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1671; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1672; VLX:       # %bb.0: # %entry
1673; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1674; VLX-NEXT:    kmovq %k0, %rax
1675; VLX-NEXT:    retq
1676;
1677; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1678; NoVLX:       # %bb.0: # %entry
1679; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1680; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1681; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1682; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1683; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1684; NoVLX-NEXT:    kmovw %k0, %eax
1685; NoVLX-NEXT:    vzeroupper
1686; NoVLX-NEXT:    retq
1687entry:
1688  %0 = bitcast <2 x i64> %__a to <4 x i32>
1689  %load = load <2 x i64>, ptr %__b
1690  %1 = bitcast <2 x i64> %load to <4 x i32>
1691  %2 = icmp eq <4 x i32> %0, %1
1692  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1693  %4 = bitcast <64 x i1> %3 to i64
1694  ret i64 %4
1695}
1696
1697define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1698; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1699; VLX:       # %bb.0: # %entry
1700; VLX-NEXT:    kmovd %edi, %k1
1701; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1702; VLX-NEXT:    kmovq %k0, %rax
1703; VLX-NEXT:    retq
1704;
1705; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1706; NoVLX:       # %bb.0: # %entry
1707; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1708; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1709; NoVLX-NEXT:    kmovw %edi, %k1
1710; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1711; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1712; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1713; NoVLX-NEXT:    kmovw %k0, %eax
1714; NoVLX-NEXT:    vzeroupper
1715; NoVLX-NEXT:    retq
1716entry:
1717  %0 = bitcast <2 x i64> %__a to <4 x i32>
1718  %1 = bitcast <2 x i64> %__b to <4 x i32>
1719  %2 = icmp eq <4 x i32> %0, %1
1720  %3 = bitcast i8 %__u to <8 x i1>
1721  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1722  %4 = and <4 x i1> %2, %extract.i
1723  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1724  %6 = bitcast <64 x i1> %5 to i64
1725  ret i64 %6
1726}
1727
1728define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1729; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1730; VLX:       # %bb.0: # %entry
1731; VLX-NEXT:    kmovd %edi, %k1
1732; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1733; VLX-NEXT:    kmovq %k0, %rax
1734; VLX-NEXT:    retq
1735;
1736; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1737; NoVLX:       # %bb.0: # %entry
1738; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1739; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1740; NoVLX-NEXT:    kmovw %edi, %k1
1741; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1742; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1743; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1744; NoVLX-NEXT:    kmovw %k0, %eax
1745; NoVLX-NEXT:    vzeroupper
1746; NoVLX-NEXT:    retq
1747entry:
1748  %0 = bitcast <2 x i64> %__a to <4 x i32>
1749  %load = load <2 x i64>, ptr %__b
1750  %1 = bitcast <2 x i64> %load to <4 x i32>
1751  %2 = icmp eq <4 x i32> %0, %1
1752  %3 = bitcast i8 %__u to <8 x i1>
1753  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1754  %4 = and <4 x i1> %2, %extract.i
1755  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1756  %6 = bitcast <64 x i1> %5 to i64
1757  ret i64 %6
1758}
1759
1760
1761define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1762; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1763; VLX:       # %bb.0: # %entry
1764; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1765; VLX-NEXT:    kmovq %k0, %rax
1766; VLX-NEXT:    retq
1767;
1768; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1769; NoVLX:       # %bb.0: # %entry
1770; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1771; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1772; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1773; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1774; NoVLX-NEXT:    kmovw %k0, %eax
1775; NoVLX-NEXT:    vzeroupper
1776; NoVLX-NEXT:    retq
1777entry:
1778  %0 = bitcast <2 x i64> %__a to <4 x i32>
1779  %load = load i32, ptr %__b
1780  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1781  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1782  %2 = icmp eq <4 x i32> %0, %1
1783  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1784  %4 = bitcast <64 x i1> %3 to i64
1785  ret i64 %4
1786}
1787
1788define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1789; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1790; VLX:       # %bb.0: # %entry
1791; VLX-NEXT:    kmovd %edi, %k1
1792; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1793; VLX-NEXT:    kmovq %k0, %rax
1794; VLX-NEXT:    retq
1795;
1796; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1797; NoVLX:       # %bb.0: # %entry
1798; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1799; NoVLX-NEXT:    kmovw %edi, %k1
1800; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1801; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1802; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1803; NoVLX-NEXT:    kmovw %k0, %eax
1804; NoVLX-NEXT:    vzeroupper
1805; NoVLX-NEXT:    retq
1806entry:
1807  %0 = bitcast <2 x i64> %__a to <4 x i32>
1808  %load = load i32, ptr %__b
1809  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1810  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1811  %2 = icmp eq <4 x i32> %0, %1
1812  %3 = bitcast i8 %__u to <8 x i1>
1813  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1814  %4 = and <4 x i1> %extract.i, %2
1815  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1816  %6 = bitcast <64 x i1> %5 to i64
1817  ret i64 %6
1818}
1819
1820
1821define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1822; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1823; VLX:       # %bb.0: # %entry
1824; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
1825; VLX-NEXT:    kmovd %k0, %eax
1826; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1827; VLX-NEXT:    vzeroupper
1828; VLX-NEXT:    retq
1829;
1830; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1831; NoVLX:       # %bb.0: # %entry
1832; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1833; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1834; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1835; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1836; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1837; NoVLX-NEXT:    kmovw %k0, %eax
1838; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1839; NoVLX-NEXT:    vzeroupper
1840; NoVLX-NEXT:    retq
1841entry:
1842  %0 = bitcast <4 x i64> %__a to <8 x i32>
1843  %1 = bitcast <4 x i64> %__b to <8 x i32>
1844  %2 = icmp eq <8 x i32> %0, %1
1845  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1846  %4 = bitcast <16 x i1> %3 to i16
1847  ret i16 %4
1848}
1849
1850define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
1851; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1852; VLX:       # %bb.0: # %entry
1853; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
1854; VLX-NEXT:    kmovd %k0, %eax
1855; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1856; VLX-NEXT:    vzeroupper
1857; VLX-NEXT:    retq
1858;
1859; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1860; NoVLX:       # %bb.0: # %entry
1861; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1862; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
1863; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1864; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1865; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1866; NoVLX-NEXT:    kmovw %k0, %eax
1867; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1868; NoVLX-NEXT:    vzeroupper
1869; NoVLX-NEXT:    retq
1870entry:
1871  %0 = bitcast <4 x i64> %__a to <8 x i32>
1872  %load = load <4 x i64>, ptr %__b
1873  %1 = bitcast <4 x i64> %load to <8 x i32>
1874  %2 = icmp eq <8 x i32> %0, %1
1875  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1876  %4 = bitcast <16 x i1> %3 to i16
1877  ret i16 %4
1878}
1879
1880define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1881; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1882; VLX:       # %bb.0: # %entry
1883; VLX-NEXT:    kmovd %edi, %k1
1884; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1885; VLX-NEXT:    kmovd %k0, %eax
1886; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1887; VLX-NEXT:    vzeroupper
1888; VLX-NEXT:    retq
1889;
1890; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1891; NoVLX:       # %bb.0: # %entry
1892; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1893; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1894; NoVLX-NEXT:    kmovw %edi, %k1
1895; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1896; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1897; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1898; NoVLX-NEXT:    kmovw %k0, %eax
1899; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1900; NoVLX-NEXT:    vzeroupper
1901; NoVLX-NEXT:    retq
1902entry:
1903  %0 = bitcast <4 x i64> %__a to <8 x i32>
1904  %1 = bitcast <4 x i64> %__b to <8 x i32>
1905  %2 = icmp eq <8 x i32> %0, %1
1906  %3 = bitcast i8 %__u to <8 x i1>
1907  %4 = and <8 x i1> %2, %3
1908  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1909  %6 = bitcast <16 x i1> %5 to i16
1910  ret i16 %6
1911}
1912
1913define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
1914; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1915; VLX:       # %bb.0: # %entry
1916; VLX-NEXT:    kmovd %edi, %k1
1917; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1918; VLX-NEXT:    kmovd %k0, %eax
1919; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1920; VLX-NEXT:    vzeroupper
1921; VLX-NEXT:    retq
1922;
1923; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1924; NoVLX:       # %bb.0: # %entry
1925; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1926; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
1927; NoVLX-NEXT:    kmovw %edi, %k1
1928; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1929; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1930; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1931; NoVLX-NEXT:    kmovw %k0, %eax
1932; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1933; NoVLX-NEXT:    vzeroupper
1934; NoVLX-NEXT:    retq
1935entry:
1936  %0 = bitcast <4 x i64> %__a to <8 x i32>
1937  %load = load <4 x i64>, ptr %__b
1938  %1 = bitcast <4 x i64> %load to <8 x i32>
1939  %2 = icmp eq <8 x i32> %0, %1
1940  %3 = bitcast i8 %__u to <8 x i1>
1941  %4 = and <8 x i1> %2, %3
1942  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1943  %6 = bitcast <16 x i1> %5 to i16
1944  ret i16 %6
1945}
1946
1947
1948define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
1949; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1950; VLX:       # %bb.0: # %entry
1951; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1952; VLX-NEXT:    kmovd %k0, %eax
1953; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1954; VLX-NEXT:    vzeroupper
1955; VLX-NEXT:    retq
1956;
1957; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1958; NoVLX:       # %bb.0: # %entry
1959; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1960; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1961; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1962; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1963; NoVLX-NEXT:    kmovw %k0, %eax
1964; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1965; NoVLX-NEXT:    vzeroupper
1966; NoVLX-NEXT:    retq
1967entry:
1968  %0 = bitcast <4 x i64> %__a to <8 x i32>
1969  %load = load i32, ptr %__b
1970  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1971  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1972  %2 = icmp eq <8 x i32> %0, %1
1973  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1974  %4 = bitcast <16 x i1> %3 to i16
1975  ret i16 %4
1976}
1977
1978define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
1979; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1980; VLX:       # %bb.0: # %entry
1981; VLX-NEXT:    kmovd %edi, %k1
1982; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1983; VLX-NEXT:    kmovd %k0, %eax
1984; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1985; VLX-NEXT:    vzeroupper
1986; VLX-NEXT:    retq
1987;
1988; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1989; NoVLX:       # %bb.0: # %entry
1990; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1991; NoVLX-NEXT:    kmovw %edi, %k1
1992; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1993; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1994; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1995; NoVLX-NEXT:    kmovw %k0, %eax
1996; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1997; NoVLX-NEXT:    vzeroupper
1998; NoVLX-NEXT:    retq
1999entry:
2000  %0 = bitcast <4 x i64> %__a to <8 x i32>
2001  %load = load i32, ptr %__b
2002  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2003  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2004  %2 = icmp eq <8 x i32> %0, %1
2005  %3 = bitcast i8 %__u to <8 x i1>
2006  %4 = and <8 x i1> %3, %2
2007  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2008  %6 = bitcast <16 x i1> %5 to i16
2009  ret i16 %6
2010}
2011
2012
2013define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2014; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2015; VLX:       # %bb.0: # %entry
2016; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
2017; VLX-NEXT:    kmovd %k0, %eax
2018; VLX-NEXT:    vzeroupper
2019; VLX-NEXT:    retq
2020;
2021; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2022; NoVLX:       # %bb.0: # %entry
2023; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2024; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2025; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2026; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2027; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2028; NoVLX-NEXT:    kmovw %k0, %eax
2029; NoVLX-NEXT:    vzeroupper
2030; NoVLX-NEXT:    retq
2031entry:
2032  %0 = bitcast <4 x i64> %__a to <8 x i32>
2033  %1 = bitcast <4 x i64> %__b to <8 x i32>
2034  %2 = icmp eq <8 x i32> %0, %1
2035  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2036  %4 = bitcast <32 x i1> %3 to i32
2037  ret i32 %4
2038}
2039
2040define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2041; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2042; VLX:       # %bb.0: # %entry
2043; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
2044; VLX-NEXT:    kmovd %k0, %eax
2045; VLX-NEXT:    vzeroupper
2046; VLX-NEXT:    retq
2047;
2048; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2049; NoVLX:       # %bb.0: # %entry
2050; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2051; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
2052; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2053; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2054; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2055; NoVLX-NEXT:    kmovw %k0, %eax
2056; NoVLX-NEXT:    vzeroupper
2057; NoVLX-NEXT:    retq
2058entry:
2059  %0 = bitcast <4 x i64> %__a to <8 x i32>
2060  %load = load <4 x i64>, ptr %__b
2061  %1 = bitcast <4 x i64> %load to <8 x i32>
2062  %2 = icmp eq <8 x i32> %0, %1
2063  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2064  %4 = bitcast <32 x i1> %3 to i32
2065  ret i32 %4
2066}
2067
2068define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2069; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2070; VLX:       # %bb.0: # %entry
2071; VLX-NEXT:    kmovd %edi, %k1
2072; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2073; VLX-NEXT:    kmovd %k0, %eax
2074; VLX-NEXT:    vzeroupper
2075; VLX-NEXT:    retq
2076;
2077; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2078; NoVLX:       # %bb.0: # %entry
2079; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2080; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2081; NoVLX-NEXT:    kmovw %edi, %k1
2082; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2083; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2084; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2085; NoVLX-NEXT:    kmovw %k0, %eax
2086; NoVLX-NEXT:    vzeroupper
2087; NoVLX-NEXT:    retq
2088entry:
2089  %0 = bitcast <4 x i64> %__a to <8 x i32>
2090  %1 = bitcast <4 x i64> %__b to <8 x i32>
2091  %2 = icmp eq <8 x i32> %0, %1
2092  %3 = bitcast i8 %__u to <8 x i1>
2093  %4 = and <8 x i1> %2, %3
2094  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2095  %6 = bitcast <32 x i1> %5 to i32
2096  ret i32 %6
2097}
2098
2099define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2100; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2101; VLX:       # %bb.0: # %entry
2102; VLX-NEXT:    kmovd %edi, %k1
2103; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2104; VLX-NEXT:    kmovd %k0, %eax
2105; VLX-NEXT:    vzeroupper
2106; VLX-NEXT:    retq
2107;
2108; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2109; NoVLX:       # %bb.0: # %entry
2110; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2111; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
2112; NoVLX-NEXT:    kmovw %edi, %k1
2113; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2114; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2115; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2116; NoVLX-NEXT:    kmovw %k0, %eax
2117; NoVLX-NEXT:    vzeroupper
2118; NoVLX-NEXT:    retq
2119entry:
2120  %0 = bitcast <4 x i64> %__a to <8 x i32>
2121  %load = load <4 x i64>, ptr %__b
2122  %1 = bitcast <4 x i64> %load to <8 x i32>
2123  %2 = icmp eq <8 x i32> %0, %1
2124  %3 = bitcast i8 %__u to <8 x i1>
2125  %4 = and <8 x i1> %2, %3
2126  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2127  %6 = bitcast <32 x i1> %5 to i32
2128  ret i32 %6
2129}
2130
2131
2132define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2133; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2134; VLX:       # %bb.0: # %entry
2135; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2136; VLX-NEXT:    kmovd %k0, %eax
2137; VLX-NEXT:    vzeroupper
2138; VLX-NEXT:    retq
2139;
2140; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2141; NoVLX:       # %bb.0: # %entry
2142; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2143; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2144; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2145; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2146; NoVLX-NEXT:    kmovw %k0, %eax
2147; NoVLX-NEXT:    vzeroupper
2148; NoVLX-NEXT:    retq
2149entry:
2150  %0 = bitcast <4 x i64> %__a to <8 x i32>
2151  %load = load i32, ptr %__b
2152  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2153  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2154  %2 = icmp eq <8 x i32> %0, %1
2155  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2156  %4 = bitcast <32 x i1> %3 to i32
2157  ret i32 %4
2158}
2159
2160define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2161; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2162; VLX:       # %bb.0: # %entry
2163; VLX-NEXT:    kmovd %edi, %k1
2164; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2165; VLX-NEXT:    kmovd %k0, %eax
2166; VLX-NEXT:    vzeroupper
2167; VLX-NEXT:    retq
2168;
2169; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2170; NoVLX:       # %bb.0: # %entry
2171; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2172; NoVLX-NEXT:    kmovw %edi, %k1
2173; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2174; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2175; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2176; NoVLX-NEXT:    kmovw %k0, %eax
2177; NoVLX-NEXT:    vzeroupper
2178; NoVLX-NEXT:    retq
2179entry:
2180  %0 = bitcast <4 x i64> %__a to <8 x i32>
2181  %load = load i32, ptr %__b
2182  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2183  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2184  %2 = icmp eq <8 x i32> %0, %1
2185  %3 = bitcast i8 %__u to <8 x i1>
2186  %4 = and <8 x i1> %3, %2
2187  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2188  %6 = bitcast <32 x i1> %5 to i32
2189  ret i32 %6
2190}
2191
2192
2193define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2194; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2195; VLX:       # %bb.0: # %entry
2196; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
2197; VLX-NEXT:    kmovq %k0, %rax
2198; VLX-NEXT:    vzeroupper
2199; VLX-NEXT:    retq
2200;
2201; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2202; NoVLX:       # %bb.0: # %entry
2203; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2204; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2205; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2206; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2207; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2208; NoVLX-NEXT:    kmovw %k0, %eax
2209; NoVLX-NEXT:    vzeroupper
2210; NoVLX-NEXT:    retq
2211entry:
2212  %0 = bitcast <4 x i64> %__a to <8 x i32>
2213  %1 = bitcast <4 x i64> %__b to <8 x i32>
2214  %2 = icmp eq <8 x i32> %0, %1
2215  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2216  %4 = bitcast <64 x i1> %3 to i64
2217  ret i64 %4
2218}
2219
2220define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2221; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2222; VLX:       # %bb.0: # %entry
2223; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
2224; VLX-NEXT:    kmovq %k0, %rax
2225; VLX-NEXT:    vzeroupper
2226; VLX-NEXT:    retq
2227;
2228; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2229; NoVLX:       # %bb.0: # %entry
2230; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2231; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
2232; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2233; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2234; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2235; NoVLX-NEXT:    kmovw %k0, %eax
2236; NoVLX-NEXT:    vzeroupper
2237; NoVLX-NEXT:    retq
2238entry:
2239  %0 = bitcast <4 x i64> %__a to <8 x i32>
2240  %load = load <4 x i64>, ptr %__b
2241  %1 = bitcast <4 x i64> %load to <8 x i32>
2242  %2 = icmp eq <8 x i32> %0, %1
2243  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2244  %4 = bitcast <64 x i1> %3 to i64
2245  ret i64 %4
2246}
2247
2248define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2249; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2250; VLX:       # %bb.0: # %entry
2251; VLX-NEXT:    kmovd %edi, %k1
2252; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2253; VLX-NEXT:    kmovq %k0, %rax
2254; VLX-NEXT:    vzeroupper
2255; VLX-NEXT:    retq
2256;
2257; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2258; NoVLX:       # %bb.0: # %entry
2259; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2260; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2261; NoVLX-NEXT:    kmovw %edi, %k1
2262; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2263; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2264; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2265; NoVLX-NEXT:    kmovw %k0, %eax
2266; NoVLX-NEXT:    vzeroupper
2267; NoVLX-NEXT:    retq
2268entry:
2269  %0 = bitcast <4 x i64> %__a to <8 x i32>
2270  %1 = bitcast <4 x i64> %__b to <8 x i32>
2271  %2 = icmp eq <8 x i32> %0, %1
2272  %3 = bitcast i8 %__u to <8 x i1>
2273  %4 = and <8 x i1> %2, %3
2274  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2275  %6 = bitcast <64 x i1> %5 to i64
2276  ret i64 %6
2277}
2278
2279define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2280; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2281; VLX:       # %bb.0: # %entry
2282; VLX-NEXT:    kmovd %edi, %k1
2283; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2284; VLX-NEXT:    kmovq %k0, %rax
2285; VLX-NEXT:    vzeroupper
2286; VLX-NEXT:    retq
2287;
2288; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2289; NoVLX:       # %bb.0: # %entry
2290; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2291; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
2292; NoVLX-NEXT:    kmovw %edi, %k1
2293; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2294; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2295; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2296; NoVLX-NEXT:    kmovw %k0, %eax
2297; NoVLX-NEXT:    vzeroupper
2298; NoVLX-NEXT:    retq
2299entry:
2300  %0 = bitcast <4 x i64> %__a to <8 x i32>
2301  %load = load <4 x i64>, ptr %__b
2302  %1 = bitcast <4 x i64> %load to <8 x i32>
2303  %2 = icmp eq <8 x i32> %0, %1
2304  %3 = bitcast i8 %__u to <8 x i1>
2305  %4 = and <8 x i1> %2, %3
2306  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2307  %6 = bitcast <64 x i1> %5 to i64
2308  ret i64 %6
2309}
2310
2311
2312define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2313; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2314; VLX:       # %bb.0: # %entry
2315; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2316; VLX-NEXT:    kmovq %k0, %rax
2317; VLX-NEXT:    vzeroupper
2318; VLX-NEXT:    retq
2319;
2320; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2321; NoVLX:       # %bb.0: # %entry
2322; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2323; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2324; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2325; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2326; NoVLX-NEXT:    kmovw %k0, %eax
2327; NoVLX-NEXT:    vzeroupper
2328; NoVLX-NEXT:    retq
2329entry:
2330  %0 = bitcast <4 x i64> %__a to <8 x i32>
2331  %load = load i32, ptr %__b
2332  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2333  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2334  %2 = icmp eq <8 x i32> %0, %1
2335  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2336  %4 = bitcast <64 x i1> %3 to i64
2337  ret i64 %4
2338}
2339
2340define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2341; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2342; VLX:       # %bb.0: # %entry
2343; VLX-NEXT:    kmovd %edi, %k1
2344; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2345; VLX-NEXT:    kmovq %k0, %rax
2346; VLX-NEXT:    vzeroupper
2347; VLX-NEXT:    retq
2348;
2349; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2350; NoVLX:       # %bb.0: # %entry
2351; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2352; NoVLX-NEXT:    kmovw %edi, %k1
2353; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2354; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2355; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2356; NoVLX-NEXT:    kmovw %k0, %eax
2357; NoVLX-NEXT:    vzeroupper
2358; NoVLX-NEXT:    retq
2359entry:
2360  %0 = bitcast <4 x i64> %__a to <8 x i32>
2361  %load = load i32, ptr %__b
2362  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2363  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2364  %2 = icmp eq <8 x i32> %0, %1
2365  %3 = bitcast i8 %__u to <8 x i1>
2366  %4 = and <8 x i1> %3, %2
2367  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2368  %6 = bitcast <64 x i1> %5 to i64
2369  ret i64 %6
2370}
2371
2372
2373define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2374; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2375; VLX:       # %bb.0: # %entry
2376; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2377; VLX-NEXT:    kmovd %k0, %eax
2378; VLX-NEXT:    vzeroupper
2379; VLX-NEXT:    retq
2380;
2381; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2382; NoVLX:       # %bb.0: # %entry
2383; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2384; NoVLX-NEXT:    kmovw %k0, %eax
2385; NoVLX-NEXT:    vzeroupper
2386; NoVLX-NEXT:    retq
2387entry:
2388  %0 = bitcast <8 x i64> %__a to <16 x i32>
2389  %1 = bitcast <8 x i64> %__b to <16 x i32>
2390  %2 = icmp eq <16 x i32> %0, %1
2391  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2392  %4 = bitcast <32 x i1> %3 to i32
2393  ret i32 %4
2394}
2395
2396define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2397; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2398; VLX:       # %bb.0: # %entry
2399; VLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2400; VLX-NEXT:    kmovd %k0, %eax
2401; VLX-NEXT:    vzeroupper
2402; VLX-NEXT:    retq
2403;
2404; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2405; NoVLX:       # %bb.0: # %entry
2406; NoVLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2407; NoVLX-NEXT:    kmovw %k0, %eax
2408; NoVLX-NEXT:    vzeroupper
2409; NoVLX-NEXT:    retq
2410entry:
2411  %0 = bitcast <8 x i64> %__a to <16 x i32>
2412  %load = load <8 x i64>, ptr %__b
2413  %1 = bitcast <8 x i64> %load to <16 x i32>
2414  %2 = icmp eq <16 x i32> %0, %1
2415  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2416  %4 = bitcast <32 x i1> %3 to i32
2417  ret i32 %4
2418}
2419
2420define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2421; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2422; VLX:       # %bb.0: # %entry
2423; VLX-NEXT:    kmovd %edi, %k1
2424; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2425; VLX-NEXT:    kmovd %k0, %eax
2426; VLX-NEXT:    vzeroupper
2427; VLX-NEXT:    retq
2428;
2429; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2430; NoVLX:       # %bb.0: # %entry
2431; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2432; NoVLX-NEXT:    kmovw %k0, %eax
2433; NoVLX-NEXT:    andl %edi, %eax
2434; NoVLX-NEXT:    vzeroupper
2435; NoVLX-NEXT:    retq
2436entry:
2437  %0 = bitcast <8 x i64> %__a to <16 x i32>
2438  %1 = bitcast <8 x i64> %__b to <16 x i32>
2439  %2 = icmp eq <16 x i32> %0, %1
2440  %3 = bitcast i16 %__u to <16 x i1>
2441  %4 = and <16 x i1> %2, %3
2442  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2443  %6 = bitcast <32 x i1> %5 to i32
2444  ret i32 %6
2445}
2446
2447define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2448; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2449; VLX:       # %bb.0: # %entry
2450; VLX-NEXT:    kmovd %edi, %k1
2451; VLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2452; VLX-NEXT:    kmovd %k0, %eax
2453; VLX-NEXT:    vzeroupper
2454; VLX-NEXT:    retq
2455;
2456; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2457; NoVLX:       # %bb.0: # %entry
2458; NoVLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
2459; NoVLX-NEXT:    kmovw %k0, %eax
2460; NoVLX-NEXT:    andl %edi, %eax
2461; NoVLX-NEXT:    vzeroupper
2462; NoVLX-NEXT:    retq
2463entry:
2464  %0 = bitcast <8 x i64> %__a to <16 x i32>
2465  %load = load <8 x i64>, ptr %__b
2466  %1 = bitcast <8 x i64> %load to <16 x i32>
2467  %2 = icmp eq <16 x i32> %0, %1
2468  %3 = bitcast i16 %__u to <16 x i1>
2469  %4 = and <16 x i1> %2, %3
2470  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2471  %6 = bitcast <32 x i1> %5 to i32
2472  ret i32 %6
2473}
2474
2475
2476define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2477; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2478; VLX:       # %bb.0: # %entry
2479; VLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2480; VLX-NEXT:    kmovd %k0, %eax
2481; VLX-NEXT:    vzeroupper
2482; VLX-NEXT:    retq
2483;
2484; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2485; NoVLX:       # %bb.0: # %entry
2486; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2487; NoVLX-NEXT:    kmovw %k0, %eax
2488; NoVLX-NEXT:    vzeroupper
2489; NoVLX-NEXT:    retq
2490entry:
2491  %0 = bitcast <8 x i64> %__a to <16 x i32>
2492  %load = load i32, ptr %__b
2493  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2494  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2495  %2 = icmp eq <16 x i32> %0, %1
2496  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2497  %4 = bitcast <32 x i1> %3 to i32
2498  ret i32 %4
2499}
2500
2501define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2502; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2503; VLX:       # %bb.0: # %entry
2504; VLX-NEXT:    kmovd %edi, %k1
2505; VLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2506; VLX-NEXT:    kmovd %k0, %eax
2507; VLX-NEXT:    vzeroupper
2508; VLX-NEXT:    retq
2509;
2510; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2511; NoVLX:       # %bb.0: # %entry
2512; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2513; NoVLX-NEXT:    kmovw %k0, %eax
2514; NoVLX-NEXT:    andl %edi, %eax
2515; NoVLX-NEXT:    vzeroupper
2516; NoVLX-NEXT:    retq
2517entry:
2518  %0 = bitcast <8 x i64> %__a to <16 x i32>
2519  %load = load i32, ptr %__b
2520  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2521  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2522  %2 = icmp eq <16 x i32> %0, %1
2523  %3 = bitcast i16 %__u to <16 x i1>
2524  %4 = and <16 x i1> %3, %2
2525  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2526  %6 = bitcast <32 x i1> %5 to i32
2527  ret i32 %6
2528}
2529
2530
2531define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2532; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2533; VLX:       # %bb.0: # %entry
2534; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2535; VLX-NEXT:    kmovq %k0, %rax
2536; VLX-NEXT:    vzeroupper
2537; VLX-NEXT:    retq
2538;
2539; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2540; NoVLX:       # %bb.0: # %entry
2541; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2542; NoVLX-NEXT:    kmovw %k0, %eax
2543; NoVLX-NEXT:    vzeroupper
2544; NoVLX-NEXT:    retq
2545entry:
2546  %0 = bitcast <8 x i64> %__a to <16 x i32>
2547  %1 = bitcast <8 x i64> %__b to <16 x i32>
2548  %2 = icmp eq <16 x i32> %0, %1
2549  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2550  %4 = bitcast <64 x i1> %3 to i64
2551  ret i64 %4
2552}
2553
2554define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2555; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2556; VLX:       # %bb.0: # %entry
2557; VLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2558; VLX-NEXT:    kmovq %k0, %rax
2559; VLX-NEXT:    vzeroupper
2560; VLX-NEXT:    retq
2561;
2562; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2563; NoVLX:       # %bb.0: # %entry
2564; NoVLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2565; NoVLX-NEXT:    kmovw %k0, %eax
2566; NoVLX-NEXT:    vzeroupper
2567; NoVLX-NEXT:    retq
2568entry:
2569  %0 = bitcast <8 x i64> %__a to <16 x i32>
2570  %load = load <8 x i64>, ptr %__b
2571  %1 = bitcast <8 x i64> %load to <16 x i32>
2572  %2 = icmp eq <16 x i32> %0, %1
2573  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2574  %4 = bitcast <64 x i1> %3 to i64
2575  ret i64 %4
2576}
2577
2578define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2579; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2580; VLX:       # %bb.0: # %entry
2581; VLX-NEXT:    kmovd %edi, %k1
2582; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2583; VLX-NEXT:    kmovq %k0, %rax
2584; VLX-NEXT:    vzeroupper
2585; VLX-NEXT:    retq
2586;
2587; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2588; NoVLX:       # %bb.0: # %entry
2589; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2590; NoVLX-NEXT:    kmovw %k0, %eax
2591; NoVLX-NEXT:    andl %edi, %eax
2592; NoVLX-NEXT:    vzeroupper
2593; NoVLX-NEXT:    retq
2594entry:
2595  %0 = bitcast <8 x i64> %__a to <16 x i32>
2596  %1 = bitcast <8 x i64> %__b to <16 x i32>
2597  %2 = icmp eq <16 x i32> %0, %1
2598  %3 = bitcast i16 %__u to <16 x i1>
2599  %4 = and <16 x i1> %2, %3
2600  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2601  %6 = bitcast <64 x i1> %5 to i64
2602  ret i64 %6
2603}
2604
2605define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2606; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2607; VLX:       # %bb.0: # %entry
2608; VLX-NEXT:    kmovd %edi, %k1
2609; VLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2610; VLX-NEXT:    kmovq %k0, %rax
2611; VLX-NEXT:    vzeroupper
2612; VLX-NEXT:    retq
2613;
2614; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2615; NoVLX:       # %bb.0: # %entry
2616; NoVLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
2617; NoVLX-NEXT:    kmovw %k0, %eax
2618; NoVLX-NEXT:    andl %edi, %eax
2619; NoVLX-NEXT:    vzeroupper
2620; NoVLX-NEXT:    retq
2621entry:
2622  %0 = bitcast <8 x i64> %__a to <16 x i32>
2623  %load = load <8 x i64>, ptr %__b
2624  %1 = bitcast <8 x i64> %load to <16 x i32>
2625  %2 = icmp eq <16 x i32> %0, %1
2626  %3 = bitcast i16 %__u to <16 x i1>
2627  %4 = and <16 x i1> %2, %3
2628  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2629  %6 = bitcast <64 x i1> %5 to i64
2630  ret i64 %6
2631}
2632
2633
2634define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2635; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2636; VLX:       # %bb.0: # %entry
2637; VLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2638; VLX-NEXT:    kmovq %k0, %rax
2639; VLX-NEXT:    vzeroupper
2640; VLX-NEXT:    retq
2641;
2642; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2643; NoVLX:       # %bb.0: # %entry
2644; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2645; NoVLX-NEXT:    kmovw %k0, %eax
2646; NoVLX-NEXT:    vzeroupper
2647; NoVLX-NEXT:    retq
2648entry:
2649  %0 = bitcast <8 x i64> %__a to <16 x i32>
2650  %load = load i32, ptr %__b
2651  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2652  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2653  %2 = icmp eq <16 x i32> %0, %1
2654  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2655  %4 = bitcast <64 x i1> %3 to i64
2656  ret i64 %4
2657}
2658
2659define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2660; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2661; VLX:       # %bb.0: # %entry
2662; VLX-NEXT:    kmovd %edi, %k1
2663; VLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2664; VLX-NEXT:    kmovq %k0, %rax
2665; VLX-NEXT:    vzeroupper
2666; VLX-NEXT:    retq
2667;
2668; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2669; NoVLX:       # %bb.0: # %entry
2670; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2671; NoVLX-NEXT:    kmovw %k0, %eax
2672; NoVLX-NEXT:    andl %edi, %eax
2673; NoVLX-NEXT:    vzeroupper
2674; NoVLX-NEXT:    retq
2675entry:
2676  %0 = bitcast <8 x i64> %__a to <16 x i32>
2677  %load = load i32, ptr %__b
2678  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2679  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2680  %2 = icmp eq <16 x i32> %0, %1
2681  %3 = bitcast i16 %__u to <16 x i1>
2682  %4 = and <16 x i1> %3, %2
2683  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2684  %6 = bitcast <64 x i1> %5 to i64
2685  ret i64 %6
2686}
2687
2688
2689define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2690; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2691; VLX:       # %bb.0: # %entry
2692; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
2693; VLX-NEXT:    kmovb %k0, %eax
2694; VLX-NEXT:    retq
2695;
2696; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2697; NoVLX:       # %bb.0: # %entry
2698; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2699; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2700; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2701; NoVLX-NEXT:    kmovw %k0, %eax
2702; NoVLX-NEXT:    andl $3, %eax
2703; NoVLX-NEXT:    vzeroupper
2704; NoVLX-NEXT:    retq
2705entry:
2706  %0 = bitcast <2 x i64> %__a to <2 x i64>
2707  %1 = bitcast <2 x i64> %__b to <2 x i64>
2708  %2 = icmp eq <2 x i64> %0, %1
2709  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2710  %4 = bitcast <4 x i1> %3 to i4
2711  ret i4 %4
2712}
2713
2714define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2715; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2716; VLX:       # %bb.0: # %entry
2717; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
2718; VLX-NEXT:    kmovb %k0, %eax
2719; VLX-NEXT:    retq
2720;
2721; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2722; NoVLX:       # %bb.0: # %entry
2723; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2724; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
2725; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2726; NoVLX-NEXT:    kmovw %k0, %eax
2727; NoVLX-NEXT:    andl $3, %eax
2728; NoVLX-NEXT:    vzeroupper
2729; NoVLX-NEXT:    retq
2730entry:
2731  %0 = bitcast <2 x i64> %__a to <2 x i64>
2732  %load = load <2 x i64>, ptr %__b
2733  %1 = bitcast <2 x i64> %load to <2 x i64>
2734  %2 = icmp eq <2 x i64> %0, %1
2735  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2736  %4 = bitcast <4 x i1> %3 to i4
2737  ret i4 %4
2738}
2739
2740define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2741; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2742; VLX:       # %bb.0: # %entry
2743; VLX-NEXT:    kmovd %edi, %k1
2744; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2745; VLX-NEXT:    kmovb %k0, %eax
2746; VLX-NEXT:    retq
2747;
2748; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2749; NoVLX:       # %bb.0: # %entry
2750; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2751; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2752; NoVLX-NEXT:    kmovw %edi, %k1
2753; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2754; NoVLX-NEXT:    kmovw %k0, %eax
2755; NoVLX-NEXT:    andl $3, %eax
2756; NoVLX-NEXT:    vzeroupper
2757; NoVLX-NEXT:    retq
2758entry:
2759  %0 = bitcast <2 x i64> %__a to <2 x i64>
2760  %1 = bitcast <2 x i64> %__b to <2 x i64>
2761  %2 = icmp eq <2 x i64> %0, %1
2762  %3 = bitcast i8 %__u to <8 x i1>
2763  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2764  %4 = and <2 x i1> %2, %extract.i
2765  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2766  %6 = bitcast <4 x i1> %5 to i4
2767  ret i4 %6
2768}
2769
2770define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2771; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2772; VLX:       # %bb.0: # %entry
2773; VLX-NEXT:    kmovd %edi, %k1
2774; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2775; VLX-NEXT:    kmovb %k0, %eax
2776; VLX-NEXT:    retq
2777;
2778; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2779; NoVLX:       # %bb.0: # %entry
2780; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2781; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
2782; NoVLX-NEXT:    kmovw %edi, %k1
2783; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2784; NoVLX-NEXT:    kmovw %k0, %eax
2785; NoVLX-NEXT:    andl $3, %eax
2786; NoVLX-NEXT:    vzeroupper
2787; NoVLX-NEXT:    retq
2788entry:
2789  %0 = bitcast <2 x i64> %__a to <2 x i64>
2790  %load = load <2 x i64>, ptr %__b
2791  %1 = bitcast <2 x i64> %load to <2 x i64>
2792  %2 = icmp eq <2 x i64> %0, %1
2793  %3 = bitcast i8 %__u to <8 x i1>
2794  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2795  %4 = and <2 x i1> %2, %extract.i
2796  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2797  %6 = bitcast <4 x i1> %5 to i4
2798  ret i4 %6
2799}
2800
2801
2802define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2803; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2804; VLX:       # %bb.0: # %entry
2805; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2806; VLX-NEXT:    kmovb %k0, %eax
2807; VLX-NEXT:    retq
2808;
2809; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2810; NoVLX:       # %bb.0: # %entry
2811; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2812; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2813; NoVLX-NEXT:    kmovw %k0, %eax
2814; NoVLX-NEXT:    andl $3, %eax
2815; NoVLX-NEXT:    vzeroupper
2816; NoVLX-NEXT:    retq
2817entry:
2818  %0 = bitcast <2 x i64> %__a to <2 x i64>
2819  %load = load i64, ptr %__b
2820  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2821  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2822  %2 = icmp eq <2 x i64> %0, %1
2823  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2824  %4 = bitcast <4 x i1> %3 to i4
2825  ret i4 %4
2826}
2827
2828define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2829; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2830; VLX:       # %bb.0: # %entry
2831; VLX-NEXT:    kmovd %edi, %k1
2832; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2833; VLX-NEXT:    kmovb %k0, %eax
2834; VLX-NEXT:    retq
2835;
2836; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2837; NoVLX:       # %bb.0: # %entry
2838; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2839; NoVLX-NEXT:    kmovw %edi, %k1
2840; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
2841; NoVLX-NEXT:    kmovw %k0, %eax
2842; NoVLX-NEXT:    andl $3, %eax
2843; NoVLX-NEXT:    vzeroupper
2844; NoVLX-NEXT:    retq
2845entry:
2846  %0 = bitcast <2 x i64> %__a to <2 x i64>
2847  %load = load i64, ptr %__b
2848  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2849  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2850  %2 = icmp eq <2 x i64> %0, %1
2851  %3 = bitcast i8 %__u to <8 x i1>
2852  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2853  %4 = and <2 x i1> %extract.i, %2
2854  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2855  %6 = bitcast <4 x i1> %5 to i4
2856  ret i4 %6
2857}
2858
2859
2860define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2861; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2862; VLX:       # %bb.0: # %entry
2863; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
2864; VLX-NEXT:    kmovd %k0, %eax
2865; VLX-NEXT:    # kill: def $al killed $al killed $eax
2866; VLX-NEXT:    retq
2867;
2868; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2869; NoVLX:       # %bb.0: # %entry
2870; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2871; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2872; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2873; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2874; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2875; NoVLX-NEXT:    kmovw %k0, %eax
2876; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2877; NoVLX-NEXT:    vzeroupper
2878; NoVLX-NEXT:    retq
2879entry:
2880  %0 = bitcast <2 x i64> %__a to <2 x i64>
2881  %1 = bitcast <2 x i64> %__b to <2 x i64>
2882  %2 = icmp eq <2 x i64> %0, %1
2883  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2884  %4 = bitcast <8 x i1> %3 to i8
2885  ret i8 %4
2886}
2887
2888define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2889; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2890; VLX:       # %bb.0: # %entry
2891; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
2892; VLX-NEXT:    kmovd %k0, %eax
2893; VLX-NEXT:    # kill: def $al killed $al killed $eax
2894; VLX-NEXT:    retq
2895;
2896; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2897; NoVLX:       # %bb.0: # %entry
2898; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2899; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
2900; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2901; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2902; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2903; NoVLX-NEXT:    kmovw %k0, %eax
2904; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2905; NoVLX-NEXT:    vzeroupper
2906; NoVLX-NEXT:    retq
2907entry:
2908  %0 = bitcast <2 x i64> %__a to <2 x i64>
2909  %load = load <2 x i64>, ptr %__b
2910  %1 = bitcast <2 x i64> %load to <2 x i64>
2911  %2 = icmp eq <2 x i64> %0, %1
2912  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2913  %4 = bitcast <8 x i1> %3 to i8
2914  ret i8 %4
2915}
2916
2917define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2918; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2919; VLX:       # %bb.0: # %entry
2920; VLX-NEXT:    kmovd %edi, %k1
2921; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2922; VLX-NEXT:    kmovd %k0, %eax
2923; VLX-NEXT:    # kill: def $al killed $al killed $eax
2924; VLX-NEXT:    retq
2925;
2926; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2927; NoVLX:       # %bb.0: # %entry
2928; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2929; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2930; NoVLX-NEXT:    kmovw %edi, %k1
2931; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2932; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2933; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2934; NoVLX-NEXT:    kmovw %k0, %eax
2935; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2936; NoVLX-NEXT:    vzeroupper
2937; NoVLX-NEXT:    retq
2938entry:
2939  %0 = bitcast <2 x i64> %__a to <2 x i64>
2940  %1 = bitcast <2 x i64> %__b to <2 x i64>
2941  %2 = icmp eq <2 x i64> %0, %1
2942  %3 = bitcast i8 %__u to <8 x i1>
2943  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2944  %4 = and <2 x i1> %2, %extract.i
2945  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2946  %6 = bitcast <8 x i1> %5 to i8
2947  ret i8 %6
2948}
2949
2950define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2951; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2952; VLX:       # %bb.0: # %entry
2953; VLX-NEXT:    kmovd %edi, %k1
2954; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2955; VLX-NEXT:    kmovd %k0, %eax
2956; VLX-NEXT:    # kill: def $al killed $al killed $eax
2957; VLX-NEXT:    retq
2958;
2959; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2960; NoVLX:       # %bb.0: # %entry
2961; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2962; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
2963; NoVLX-NEXT:    kmovw %edi, %k1
2964; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2965; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2966; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2967; NoVLX-NEXT:    kmovw %k0, %eax
2968; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2969; NoVLX-NEXT:    vzeroupper
2970; NoVLX-NEXT:    retq
2971entry:
2972  %0 = bitcast <2 x i64> %__a to <2 x i64>
2973  %load = load <2 x i64>, ptr %__b
2974  %1 = bitcast <2 x i64> %load to <2 x i64>
2975  %2 = icmp eq <2 x i64> %0, %1
2976  %3 = bitcast i8 %__u to <8 x i1>
2977  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2978  %4 = and <2 x i1> %2, %extract.i
2979  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2980  %6 = bitcast <8 x i1> %5 to i8
2981  ret i8 %6
2982}
2983
2984
2985define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2986; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2987; VLX:       # %bb.0: # %entry
2988; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2989; VLX-NEXT:    kmovd %k0, %eax
2990; VLX-NEXT:    # kill: def $al killed $al killed $eax
2991; VLX-NEXT:    retq
2992;
2993; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2994; NoVLX:       # %bb.0: # %entry
2995; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2996; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2997; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2998; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2999; NoVLX-NEXT:    kmovw %k0, %eax
3000; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3001; NoVLX-NEXT:    vzeroupper
3002; NoVLX-NEXT:    retq
3003entry:
3004  %0 = bitcast <2 x i64> %__a to <2 x i64>
3005  %load = load i64, ptr %__b
3006  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3007  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3008  %2 = icmp eq <2 x i64> %0, %1
3009  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3010  %4 = bitcast <8 x i1> %3 to i8
3011  ret i8 %4
3012}
3013
3014define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3015; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3016; VLX:       # %bb.0: # %entry
3017; VLX-NEXT:    kmovd %edi, %k1
3018; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3019; VLX-NEXT:    kmovd %k0, %eax
3020; VLX-NEXT:    # kill: def $al killed $al killed $eax
3021; VLX-NEXT:    retq
3022;
3023; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3024; NoVLX:       # %bb.0: # %entry
3025; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3026; NoVLX-NEXT:    kmovw %edi, %k1
3027; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3028; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3029; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3030; NoVLX-NEXT:    kmovw %k0, %eax
3031; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3032; NoVLX-NEXT:    vzeroupper
3033; NoVLX-NEXT:    retq
3034entry:
3035  %0 = bitcast <2 x i64> %__a to <2 x i64>
3036  %load = load i64, ptr %__b
3037  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3038  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3039  %2 = icmp eq <2 x i64> %0, %1
3040  %3 = bitcast i8 %__u to <8 x i1>
3041  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3042  %4 = and <2 x i1> %extract.i, %2
3043  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3044  %6 = bitcast <8 x i1> %5 to i8
3045  ret i8 %6
3046}
3047
3048
3049define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3050; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3051; VLX:       # %bb.0: # %entry
3052; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3053; VLX-NEXT:    kmovd %k0, %eax
3054; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3055; VLX-NEXT:    retq
3056;
3057; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3058; NoVLX:       # %bb.0: # %entry
3059; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3060; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3061; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3062; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3063; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3064; NoVLX-NEXT:    kmovw %k0, %eax
3065; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3066; NoVLX-NEXT:    vzeroupper
3067; NoVLX-NEXT:    retq
3068entry:
3069  %0 = bitcast <2 x i64> %__a to <2 x i64>
3070  %1 = bitcast <2 x i64> %__b to <2 x i64>
3071  %2 = icmp eq <2 x i64> %0, %1
3072  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3073  %4 = bitcast <16 x i1> %3 to i16
3074  ret i16 %4
3075}
3076
3077define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3078; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3079; VLX:       # %bb.0: # %entry
3080; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3081; VLX-NEXT:    kmovd %k0, %eax
3082; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3083; VLX-NEXT:    retq
3084;
3085; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3086; NoVLX:       # %bb.0: # %entry
3087; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3088; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3089; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3090; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3091; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3092; NoVLX-NEXT:    kmovw %k0, %eax
3093; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3094; NoVLX-NEXT:    vzeroupper
3095; NoVLX-NEXT:    retq
3096entry:
3097  %0 = bitcast <2 x i64> %__a to <2 x i64>
3098  %load = load <2 x i64>, ptr %__b
3099  %1 = bitcast <2 x i64> %load to <2 x i64>
3100  %2 = icmp eq <2 x i64> %0, %1
3101  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3102  %4 = bitcast <16 x i1> %3 to i16
3103  ret i16 %4
3104}
3105
3106define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3107; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3108; VLX:       # %bb.0: # %entry
3109; VLX-NEXT:    kmovd %edi, %k1
3110; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3111; VLX-NEXT:    kmovd %k0, %eax
3112; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3113; VLX-NEXT:    retq
3114;
3115; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3116; NoVLX:       # %bb.0: # %entry
3117; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3118; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3119; NoVLX-NEXT:    kmovw %edi, %k1
3120; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3121; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3122; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3123; NoVLX-NEXT:    kmovw %k0, %eax
3124; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3125; NoVLX-NEXT:    vzeroupper
3126; NoVLX-NEXT:    retq
3127entry:
3128  %0 = bitcast <2 x i64> %__a to <2 x i64>
3129  %1 = bitcast <2 x i64> %__b to <2 x i64>
3130  %2 = icmp eq <2 x i64> %0, %1
3131  %3 = bitcast i8 %__u to <8 x i1>
3132  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3133  %4 = and <2 x i1> %2, %extract.i
3134  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3135  %6 = bitcast <16 x i1> %5 to i16
3136  ret i16 %6
3137}
3138
3139define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3140; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3141; VLX:       # %bb.0: # %entry
3142; VLX-NEXT:    kmovd %edi, %k1
3143; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3144; VLX-NEXT:    kmovd %k0, %eax
3145; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3146; VLX-NEXT:    retq
3147;
3148; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3149; NoVLX:       # %bb.0: # %entry
3150; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3151; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3152; NoVLX-NEXT:    kmovw %edi, %k1
3153; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3154; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3155; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3156; NoVLX-NEXT:    kmovw %k0, %eax
3157; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3158; NoVLX-NEXT:    vzeroupper
3159; NoVLX-NEXT:    retq
3160entry:
3161  %0 = bitcast <2 x i64> %__a to <2 x i64>
3162  %load = load <2 x i64>, ptr %__b
3163  %1 = bitcast <2 x i64> %load to <2 x i64>
3164  %2 = icmp eq <2 x i64> %0, %1
3165  %3 = bitcast i8 %__u to <8 x i1>
3166  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3167  %4 = and <2 x i1> %2, %extract.i
3168  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3169  %6 = bitcast <16 x i1> %5 to i16
3170  ret i16 %6
3171}
3172
3173
3174define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3175; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3176; VLX:       # %bb.0: # %entry
3177; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3178; VLX-NEXT:    kmovd %k0, %eax
3179; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3180; VLX-NEXT:    retq
3181;
3182; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3183; NoVLX:       # %bb.0: # %entry
3184; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3185; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3186; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3187; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3188; NoVLX-NEXT:    kmovw %k0, %eax
3189; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3190; NoVLX-NEXT:    vzeroupper
3191; NoVLX-NEXT:    retq
3192entry:
3193  %0 = bitcast <2 x i64> %__a to <2 x i64>
3194  %load = load i64, ptr %__b
3195  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3196  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3197  %2 = icmp eq <2 x i64> %0, %1
3198  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3199  %4 = bitcast <16 x i1> %3 to i16
3200  ret i16 %4
3201}
3202
3203define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3204; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3205; VLX:       # %bb.0: # %entry
3206; VLX-NEXT:    kmovd %edi, %k1
3207; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3208; VLX-NEXT:    kmovd %k0, %eax
3209; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3210; VLX-NEXT:    retq
3211;
3212; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3213; NoVLX:       # %bb.0: # %entry
3214; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3215; NoVLX-NEXT:    kmovw %edi, %k1
3216; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3217; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3218; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3219; NoVLX-NEXT:    kmovw %k0, %eax
3220; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3221; NoVLX-NEXT:    vzeroupper
3222; NoVLX-NEXT:    retq
3223entry:
3224  %0 = bitcast <2 x i64> %__a to <2 x i64>
3225  %load = load i64, ptr %__b
3226  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3227  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3228  %2 = icmp eq <2 x i64> %0, %1
3229  %3 = bitcast i8 %__u to <8 x i1>
3230  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3231  %4 = and <2 x i1> %extract.i, %2
3232  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3233  %6 = bitcast <16 x i1> %5 to i16
3234  ret i16 %6
3235}
3236
3237
3238define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3239; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3240; VLX:       # %bb.0: # %entry
3241; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3242; VLX-NEXT:    kmovd %k0, %eax
3243; VLX-NEXT:    retq
3244;
3245; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3246; NoVLX:       # %bb.0: # %entry
3247; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3248; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3249; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3250; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3251; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3252; NoVLX-NEXT:    kmovw %k0, %eax
3253; NoVLX-NEXT:    vzeroupper
3254; NoVLX-NEXT:    retq
3255entry:
3256  %0 = bitcast <2 x i64> %__a to <2 x i64>
3257  %1 = bitcast <2 x i64> %__b to <2 x i64>
3258  %2 = icmp eq <2 x i64> %0, %1
3259  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3260  %4 = bitcast <32 x i1> %3 to i32
3261  ret i32 %4
3262}
3263
3264define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3265; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3266; VLX:       # %bb.0: # %entry
3267; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3268; VLX-NEXT:    kmovd %k0, %eax
3269; VLX-NEXT:    retq
3270;
3271; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3272; NoVLX:       # %bb.0: # %entry
3273; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3274; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3275; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3276; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3277; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3278; NoVLX-NEXT:    kmovw %k0, %eax
3279; NoVLX-NEXT:    vzeroupper
3280; NoVLX-NEXT:    retq
3281entry:
3282  %0 = bitcast <2 x i64> %__a to <2 x i64>
3283  %load = load <2 x i64>, ptr %__b
3284  %1 = bitcast <2 x i64> %load to <2 x i64>
3285  %2 = icmp eq <2 x i64> %0, %1
3286  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3287  %4 = bitcast <32 x i1> %3 to i32
3288  ret i32 %4
3289}
3290
3291define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3292; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3293; VLX:       # %bb.0: # %entry
3294; VLX-NEXT:    kmovd %edi, %k1
3295; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3296; VLX-NEXT:    kmovd %k0, %eax
3297; VLX-NEXT:    retq
3298;
3299; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3300; NoVLX:       # %bb.0: # %entry
3301; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3302; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3303; NoVLX-NEXT:    kmovw %edi, %k1
3304; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3305; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3306; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3307; NoVLX-NEXT:    kmovw %k0, %eax
3308; NoVLX-NEXT:    vzeroupper
3309; NoVLX-NEXT:    retq
3310entry:
3311  %0 = bitcast <2 x i64> %__a to <2 x i64>
3312  %1 = bitcast <2 x i64> %__b to <2 x i64>
3313  %2 = icmp eq <2 x i64> %0, %1
3314  %3 = bitcast i8 %__u to <8 x i1>
3315  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3316  %4 = and <2 x i1> %2, %extract.i
3317  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3318  %6 = bitcast <32 x i1> %5 to i32
3319  ret i32 %6
3320}
3321
3322define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3323; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3324; VLX:       # %bb.0: # %entry
3325; VLX-NEXT:    kmovd %edi, %k1
3326; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3327; VLX-NEXT:    kmovd %k0, %eax
3328; VLX-NEXT:    retq
3329;
3330; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3331; NoVLX:       # %bb.0: # %entry
3332; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3333; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3334; NoVLX-NEXT:    kmovw %edi, %k1
3335; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3336; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3337; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3338; NoVLX-NEXT:    kmovw %k0, %eax
3339; NoVLX-NEXT:    vzeroupper
3340; NoVLX-NEXT:    retq
3341entry:
3342  %0 = bitcast <2 x i64> %__a to <2 x i64>
3343  %load = load <2 x i64>, ptr %__b
3344  %1 = bitcast <2 x i64> %load to <2 x i64>
3345  %2 = icmp eq <2 x i64> %0, %1
3346  %3 = bitcast i8 %__u to <8 x i1>
3347  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3348  %4 = and <2 x i1> %2, %extract.i
3349  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3350  %6 = bitcast <32 x i1> %5 to i32
3351  ret i32 %6
3352}
3353
3354
3355define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3356; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3357; VLX:       # %bb.0: # %entry
3358; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3359; VLX-NEXT:    kmovd %k0, %eax
3360; VLX-NEXT:    retq
3361;
3362; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3363; NoVLX:       # %bb.0: # %entry
3364; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3365; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3366; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3367; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3368; NoVLX-NEXT:    kmovw %k0, %eax
3369; NoVLX-NEXT:    vzeroupper
3370; NoVLX-NEXT:    retq
3371entry:
3372  %0 = bitcast <2 x i64> %__a to <2 x i64>
3373  %load = load i64, ptr %__b
3374  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3375  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3376  %2 = icmp eq <2 x i64> %0, %1
3377  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3378  %4 = bitcast <32 x i1> %3 to i32
3379  ret i32 %4
3380}
3381
3382define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3383; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3384; VLX:       # %bb.0: # %entry
3385; VLX-NEXT:    kmovd %edi, %k1
3386; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3387; VLX-NEXT:    kmovd %k0, %eax
3388; VLX-NEXT:    retq
3389;
3390; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3391; NoVLX:       # %bb.0: # %entry
3392; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3393; NoVLX-NEXT:    kmovw %edi, %k1
3394; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3395; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3396; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3397; NoVLX-NEXT:    kmovw %k0, %eax
3398; NoVLX-NEXT:    vzeroupper
3399; NoVLX-NEXT:    retq
3400entry:
3401  %0 = bitcast <2 x i64> %__a to <2 x i64>
3402  %load = load i64, ptr %__b
3403  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3404  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3405  %2 = icmp eq <2 x i64> %0, %1
3406  %3 = bitcast i8 %__u to <8 x i1>
3407  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3408  %4 = and <2 x i1> %extract.i, %2
3409  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3410  %6 = bitcast <32 x i1> %5 to i32
3411  ret i32 %6
3412}
3413
3414
3415define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3416; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3417; VLX:       # %bb.0: # %entry
3418; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3419; VLX-NEXT:    kmovq %k0, %rax
3420; VLX-NEXT:    retq
3421;
3422; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3423; NoVLX:       # %bb.0: # %entry
3424; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3425; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3426; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3427; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3428; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3429; NoVLX-NEXT:    kmovw %k0, %eax
3430; NoVLX-NEXT:    vzeroupper
3431; NoVLX-NEXT:    retq
3432entry:
3433  %0 = bitcast <2 x i64> %__a to <2 x i64>
3434  %1 = bitcast <2 x i64> %__b to <2 x i64>
3435  %2 = icmp eq <2 x i64> %0, %1
3436  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3437  %4 = bitcast <64 x i1> %3 to i64
3438  ret i64 %4
3439}
3440
3441define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3442; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3443; VLX:       # %bb.0: # %entry
3444; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3445; VLX-NEXT:    kmovq %k0, %rax
3446; VLX-NEXT:    retq
3447;
3448; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3449; NoVLX:       # %bb.0: # %entry
3450; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3451; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3452; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3453; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3454; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3455; NoVLX-NEXT:    kmovw %k0, %eax
3456; NoVLX-NEXT:    vzeroupper
3457; NoVLX-NEXT:    retq
3458entry:
3459  %0 = bitcast <2 x i64> %__a to <2 x i64>
3460  %load = load <2 x i64>, ptr %__b
3461  %1 = bitcast <2 x i64> %load to <2 x i64>
3462  %2 = icmp eq <2 x i64> %0, %1
3463  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3464  %4 = bitcast <64 x i1> %3 to i64
3465  ret i64 %4
3466}
3467
3468define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3469; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3470; VLX:       # %bb.0: # %entry
3471; VLX-NEXT:    kmovd %edi, %k1
3472; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3473; VLX-NEXT:    kmovq %k0, %rax
3474; VLX-NEXT:    retq
3475;
3476; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3477; NoVLX:       # %bb.0: # %entry
3478; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3479; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3480; NoVLX-NEXT:    kmovw %edi, %k1
3481; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3482; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3483; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3484; NoVLX-NEXT:    kmovw %k0, %eax
3485; NoVLX-NEXT:    vzeroupper
3486; NoVLX-NEXT:    retq
3487entry:
3488  %0 = bitcast <2 x i64> %__a to <2 x i64>
3489  %1 = bitcast <2 x i64> %__b to <2 x i64>
3490  %2 = icmp eq <2 x i64> %0, %1
3491  %3 = bitcast i8 %__u to <8 x i1>
3492  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3493  %4 = and <2 x i1> %2, %extract.i
3494  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3495  %6 = bitcast <64 x i1> %5 to i64
3496  ret i64 %6
3497}
3498
3499define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3500; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3501; VLX:       # %bb.0: # %entry
3502; VLX-NEXT:    kmovd %edi, %k1
3503; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3504; VLX-NEXT:    kmovq %k0, %rax
3505; VLX-NEXT:    retq
3506;
3507; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3508; NoVLX:       # %bb.0: # %entry
3509; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3510; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3511; NoVLX-NEXT:    kmovw %edi, %k1
3512; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3513; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3514; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3515; NoVLX-NEXT:    kmovw %k0, %eax
3516; NoVLX-NEXT:    vzeroupper
3517; NoVLX-NEXT:    retq
3518entry:
3519  %0 = bitcast <2 x i64> %__a to <2 x i64>
3520  %load = load <2 x i64>, ptr %__b
3521  %1 = bitcast <2 x i64> %load to <2 x i64>
3522  %2 = icmp eq <2 x i64> %0, %1
3523  %3 = bitcast i8 %__u to <8 x i1>
3524  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3525  %4 = and <2 x i1> %2, %extract.i
3526  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3527  %6 = bitcast <64 x i1> %5 to i64
3528  ret i64 %6
3529}
3530
3531
3532define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3533; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3534; VLX:       # %bb.0: # %entry
3535; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3536; VLX-NEXT:    kmovq %k0, %rax
3537; VLX-NEXT:    retq
3538;
3539; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3540; NoVLX:       # %bb.0: # %entry
3541; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3542; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3543; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3544; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3545; NoVLX-NEXT:    kmovw %k0, %eax
3546; NoVLX-NEXT:    vzeroupper
3547; NoVLX-NEXT:    retq
3548entry:
3549  %0 = bitcast <2 x i64> %__a to <2 x i64>
3550  %load = load i64, ptr %__b
3551  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3552  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3553  %2 = icmp eq <2 x i64> %0, %1
3554  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3555  %4 = bitcast <64 x i1> %3 to i64
3556  ret i64 %4
3557}
3558
3559define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3560; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3561; VLX:       # %bb.0: # %entry
3562; VLX-NEXT:    kmovd %edi, %k1
3563; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3564; VLX-NEXT:    kmovq %k0, %rax
3565; VLX-NEXT:    retq
3566;
3567; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3568; NoVLX:       # %bb.0: # %entry
3569; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3570; NoVLX-NEXT:    kmovw %edi, %k1
3571; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3572; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3573; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3574; NoVLX-NEXT:    kmovw %k0, %eax
3575; NoVLX-NEXT:    vzeroupper
3576; NoVLX-NEXT:    retq
3577entry:
3578  %0 = bitcast <2 x i64> %__a to <2 x i64>
3579  %load = load i64, ptr %__b
3580  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3581  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3582  %2 = icmp eq <2 x i64> %0, %1
3583  %3 = bitcast i8 %__u to <8 x i1>
3584  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3585  %4 = and <2 x i1> %extract.i, %2
3586  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3587  %6 = bitcast <64 x i1> %5 to i64
3588  ret i64 %6
3589}
3590
3591
3592define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3593; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3594; VLX:       # %bb.0: # %entry
3595; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
3596; VLX-NEXT:    kmovd %k0, %eax
3597; VLX-NEXT:    # kill: def $al killed $al killed $eax
3598; VLX-NEXT:    vzeroupper
3599; VLX-NEXT:    retq
3600;
3601; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3602; NoVLX:       # %bb.0: # %entry
3603; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3604; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3605; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3606; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3607; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3608; NoVLX-NEXT:    kmovw %k0, %eax
3609; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3610; NoVLX-NEXT:    vzeroupper
3611; NoVLX-NEXT:    retq
3612entry:
3613  %0 = bitcast <4 x i64> %__a to <4 x i64>
3614  %1 = bitcast <4 x i64> %__b to <4 x i64>
3615  %2 = icmp eq <4 x i64> %0, %1
3616  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3617  %4 = bitcast <8 x i1> %3 to i8
3618  ret i8 %4
3619}
3620
3621define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3622; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3623; VLX:       # %bb.0: # %entry
3624; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
3625; VLX-NEXT:    kmovd %k0, %eax
3626; VLX-NEXT:    # kill: def $al killed $al killed $eax
3627; VLX-NEXT:    vzeroupper
3628; VLX-NEXT:    retq
3629;
3630; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3631; NoVLX:       # %bb.0: # %entry
3632; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3633; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
3634; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3635; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3636; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3637; NoVLX-NEXT:    kmovw %k0, %eax
3638; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3639; NoVLX-NEXT:    vzeroupper
3640; NoVLX-NEXT:    retq
3641entry:
3642  %0 = bitcast <4 x i64> %__a to <4 x i64>
3643  %load = load <4 x i64>, ptr %__b
3644  %1 = bitcast <4 x i64> %load to <4 x i64>
3645  %2 = icmp eq <4 x i64> %0, %1
3646  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3647  %4 = bitcast <8 x i1> %3 to i8
3648  ret i8 %4
3649}
3650
3651define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3652; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3653; VLX:       # %bb.0: # %entry
3654; VLX-NEXT:    kmovd %edi, %k1
3655; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3656; VLX-NEXT:    kmovd %k0, %eax
3657; VLX-NEXT:    # kill: def $al killed $al killed $eax
3658; VLX-NEXT:    vzeroupper
3659; VLX-NEXT:    retq
3660;
3661; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3662; NoVLX:       # %bb.0: # %entry
3663; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3664; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3665; NoVLX-NEXT:    kmovw %edi, %k1
3666; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3667; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3668; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3669; NoVLX-NEXT:    kmovw %k0, %eax
3670; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3671; NoVLX-NEXT:    vzeroupper
3672; NoVLX-NEXT:    retq
3673entry:
3674  %0 = bitcast <4 x i64> %__a to <4 x i64>
3675  %1 = bitcast <4 x i64> %__b to <4 x i64>
3676  %2 = icmp eq <4 x i64> %0, %1
3677  %3 = bitcast i8 %__u to <8 x i1>
3678  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3679  %4 = and <4 x i1> %2, %extract.i
3680  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3681  %6 = bitcast <8 x i1> %5 to i8
3682  ret i8 %6
3683}
3684
3685define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3686; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3687; VLX:       # %bb.0: # %entry
3688; VLX-NEXT:    kmovd %edi, %k1
3689; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3690; VLX-NEXT:    kmovd %k0, %eax
3691; VLX-NEXT:    # kill: def $al killed $al killed $eax
3692; VLX-NEXT:    vzeroupper
3693; VLX-NEXT:    retq
3694;
3695; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3696; NoVLX:       # %bb.0: # %entry
3697; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3698; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
3699; NoVLX-NEXT:    kmovw %edi, %k1
3700; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3701; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3702; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3703; NoVLX-NEXT:    kmovw %k0, %eax
3704; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3705; NoVLX-NEXT:    vzeroupper
3706; NoVLX-NEXT:    retq
3707entry:
3708  %0 = bitcast <4 x i64> %__a to <4 x i64>
3709  %load = load <4 x i64>, ptr %__b
3710  %1 = bitcast <4 x i64> %load to <4 x i64>
3711  %2 = icmp eq <4 x i64> %0, %1
3712  %3 = bitcast i8 %__u to <8 x i1>
3713  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3714  %4 = and <4 x i1> %2, %extract.i
3715  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3716  %6 = bitcast <8 x i1> %5 to i8
3717  ret i8 %6
3718}
3719
3720
3721define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3722; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3723; VLX:       # %bb.0: # %entry
3724; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3725; VLX-NEXT:    kmovd %k0, %eax
3726; VLX-NEXT:    # kill: def $al killed $al killed $eax
3727; VLX-NEXT:    vzeroupper
3728; VLX-NEXT:    retq
3729;
3730; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3731; NoVLX:       # %bb.0: # %entry
3732; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3733; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3734; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3735; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3736; NoVLX-NEXT:    kmovw %k0, %eax
3737; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3738; NoVLX-NEXT:    vzeroupper
3739; NoVLX-NEXT:    retq
3740entry:
3741  %0 = bitcast <4 x i64> %__a to <4 x i64>
3742  %load = load i64, ptr %__b
3743  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3744  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3745  %2 = icmp eq <4 x i64> %0, %1
3746  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3747  %4 = bitcast <8 x i1> %3 to i8
3748  ret i8 %4
3749}
3750
3751define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3752; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3753; VLX:       # %bb.0: # %entry
3754; VLX-NEXT:    kmovd %edi, %k1
3755; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3756; VLX-NEXT:    kmovd %k0, %eax
3757; VLX-NEXT:    # kill: def $al killed $al killed $eax
3758; VLX-NEXT:    vzeroupper
3759; VLX-NEXT:    retq
3760;
3761; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3762; NoVLX:       # %bb.0: # %entry
3763; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3764; NoVLX-NEXT:    kmovw %edi, %k1
3765; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3766; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3767; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3768; NoVLX-NEXT:    kmovw %k0, %eax
3769; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3770; NoVLX-NEXT:    vzeroupper
3771; NoVLX-NEXT:    retq
3772entry:
3773  %0 = bitcast <4 x i64> %__a to <4 x i64>
3774  %load = load i64, ptr %__b
3775  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3776  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3777  %2 = icmp eq <4 x i64> %0, %1
3778  %3 = bitcast i8 %__u to <8 x i1>
3779  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3780  %4 = and <4 x i1> %extract.i, %2
3781  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3782  %6 = bitcast <8 x i1> %5 to i8
3783  ret i8 %6
3784}
3785
3786
3787define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3788; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3789; VLX:       # %bb.0: # %entry
3790; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
3791; VLX-NEXT:    kmovd %k0, %eax
3792; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3793; VLX-NEXT:    vzeroupper
3794; VLX-NEXT:    retq
3795;
3796; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3797; NoVLX:       # %bb.0: # %entry
3798; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3799; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3800; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3801; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3802; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3803; NoVLX-NEXT:    kmovw %k0, %eax
3804; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3805; NoVLX-NEXT:    vzeroupper
3806; NoVLX-NEXT:    retq
3807entry:
3808  %0 = bitcast <4 x i64> %__a to <4 x i64>
3809  %1 = bitcast <4 x i64> %__b to <4 x i64>
3810  %2 = icmp eq <4 x i64> %0, %1
3811  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3812  %4 = bitcast <16 x i1> %3 to i16
3813  ret i16 %4
3814}
3815
3816define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3817; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3818; VLX:       # %bb.0: # %entry
3819; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
3820; VLX-NEXT:    kmovd %k0, %eax
3821; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3822; VLX-NEXT:    vzeroupper
3823; VLX-NEXT:    retq
3824;
3825; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3826; NoVLX:       # %bb.0: # %entry
3827; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3828; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
3829; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3830; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3831; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3832; NoVLX-NEXT:    kmovw %k0, %eax
3833; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3834; NoVLX-NEXT:    vzeroupper
3835; NoVLX-NEXT:    retq
3836entry:
3837  %0 = bitcast <4 x i64> %__a to <4 x i64>
3838  %load = load <4 x i64>, ptr %__b
3839  %1 = bitcast <4 x i64> %load to <4 x i64>
3840  %2 = icmp eq <4 x i64> %0, %1
3841  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3842  %4 = bitcast <16 x i1> %3 to i16
3843  ret i16 %4
3844}
3845
3846define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3847; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3848; VLX:       # %bb.0: # %entry
3849; VLX-NEXT:    kmovd %edi, %k1
3850; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3851; VLX-NEXT:    kmovd %k0, %eax
3852; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3853; VLX-NEXT:    vzeroupper
3854; VLX-NEXT:    retq
3855;
3856; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3857; NoVLX:       # %bb.0: # %entry
3858; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3859; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3860; NoVLX-NEXT:    kmovw %edi, %k1
3861; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3862; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3863; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3864; NoVLX-NEXT:    kmovw %k0, %eax
3865; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3866; NoVLX-NEXT:    vzeroupper
3867; NoVLX-NEXT:    retq
3868entry:
3869  %0 = bitcast <4 x i64> %__a to <4 x i64>
3870  %1 = bitcast <4 x i64> %__b to <4 x i64>
3871  %2 = icmp eq <4 x i64> %0, %1
3872  %3 = bitcast i8 %__u to <8 x i1>
3873  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3874  %4 = and <4 x i1> %2, %extract.i
3875  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3876  %6 = bitcast <16 x i1> %5 to i16
3877  ret i16 %6
3878}
3879
3880define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3881; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3882; VLX:       # %bb.0: # %entry
3883; VLX-NEXT:    kmovd %edi, %k1
3884; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3885; VLX-NEXT:    kmovd %k0, %eax
3886; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3887; VLX-NEXT:    vzeroupper
3888; VLX-NEXT:    retq
3889;
3890; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3891; NoVLX:       # %bb.0: # %entry
3892; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3893; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
3894; NoVLX-NEXT:    kmovw %edi, %k1
3895; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3896; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3897; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3898; NoVLX-NEXT:    kmovw %k0, %eax
3899; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3900; NoVLX-NEXT:    vzeroupper
3901; NoVLX-NEXT:    retq
3902entry:
3903  %0 = bitcast <4 x i64> %__a to <4 x i64>
3904  %load = load <4 x i64>, ptr %__b
3905  %1 = bitcast <4 x i64> %load to <4 x i64>
3906  %2 = icmp eq <4 x i64> %0, %1
3907  %3 = bitcast i8 %__u to <8 x i1>
3908  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3909  %4 = and <4 x i1> %2, %extract.i
3910  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3911  %6 = bitcast <16 x i1> %5 to i16
3912  ret i16 %6
3913}
3914
3915
3916define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3917; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3918; VLX:       # %bb.0: # %entry
3919; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3920; VLX-NEXT:    kmovd %k0, %eax
3921; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3922; VLX-NEXT:    vzeroupper
3923; VLX-NEXT:    retq
3924;
3925; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3926; NoVLX:       # %bb.0: # %entry
3927; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3928; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3929; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3930; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3931; NoVLX-NEXT:    kmovw %k0, %eax
3932; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3933; NoVLX-NEXT:    vzeroupper
3934; NoVLX-NEXT:    retq
3935entry:
3936  %0 = bitcast <4 x i64> %__a to <4 x i64>
3937  %load = load i64, ptr %__b
3938  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3939  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3940  %2 = icmp eq <4 x i64> %0, %1
3941  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3942  %4 = bitcast <16 x i1> %3 to i16
3943  ret i16 %4
3944}
3945
3946define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3947; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3948; VLX:       # %bb.0: # %entry
3949; VLX-NEXT:    kmovd %edi, %k1
3950; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3951; VLX-NEXT:    kmovd %k0, %eax
3952; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3953; VLX-NEXT:    vzeroupper
3954; VLX-NEXT:    retq
3955;
3956; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3957; NoVLX:       # %bb.0: # %entry
3958; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3959; NoVLX-NEXT:    kmovw %edi, %k1
3960; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3961; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3962; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3963; NoVLX-NEXT:    kmovw %k0, %eax
3964; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3965; NoVLX-NEXT:    vzeroupper
3966; NoVLX-NEXT:    retq
3967entry:
3968  %0 = bitcast <4 x i64> %__a to <4 x i64>
3969  %load = load i64, ptr %__b
3970  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3971  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3972  %2 = icmp eq <4 x i64> %0, %1
3973  %3 = bitcast i8 %__u to <8 x i1>
3974  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3975  %4 = and <4 x i1> %extract.i, %2
3976  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3977  %6 = bitcast <16 x i1> %5 to i16
3978  ret i16 %6
3979}
3980
3981
3982define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3983; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3984; VLX:       # %bb.0: # %entry
3985; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
3986; VLX-NEXT:    kmovd %k0, %eax
3987; VLX-NEXT:    vzeroupper
3988; VLX-NEXT:    retq
3989;
3990; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3991; NoVLX:       # %bb.0: # %entry
3992; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3993; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3994; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3995; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3996; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3997; NoVLX-NEXT:    kmovw %k0, %eax
3998; NoVLX-NEXT:    vzeroupper
3999; NoVLX-NEXT:    retq
4000entry:
4001  %0 = bitcast <4 x i64> %__a to <4 x i64>
4002  %1 = bitcast <4 x i64> %__b to <4 x i64>
4003  %2 = icmp eq <4 x i64> %0, %1
4004  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4005  %4 = bitcast <32 x i1> %3 to i32
4006  ret i32 %4
4007}
4008
4009define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4010; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4011; VLX:       # %bb.0: # %entry
4012; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
4013; VLX-NEXT:    kmovd %k0, %eax
4014; VLX-NEXT:    vzeroupper
4015; VLX-NEXT:    retq
4016;
4017; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4018; NoVLX:       # %bb.0: # %entry
4019; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4020; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
4021; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4022; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4023; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4024; NoVLX-NEXT:    kmovw %k0, %eax
4025; NoVLX-NEXT:    vzeroupper
4026; NoVLX-NEXT:    retq
4027entry:
4028  %0 = bitcast <4 x i64> %__a to <4 x i64>
4029  %load = load <4 x i64>, ptr %__b
4030  %1 = bitcast <4 x i64> %load to <4 x i64>
4031  %2 = icmp eq <4 x i64> %0, %1
4032  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4033  %4 = bitcast <32 x i1> %3 to i32
4034  ret i32 %4
4035}
4036
4037define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4038; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4039; VLX:       # %bb.0: # %entry
4040; VLX-NEXT:    kmovd %edi, %k1
4041; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4042; VLX-NEXT:    kmovd %k0, %eax
4043; VLX-NEXT:    vzeroupper
4044; VLX-NEXT:    retq
4045;
4046; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4047; NoVLX:       # %bb.0: # %entry
4048; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4049; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4050; NoVLX-NEXT:    kmovw %edi, %k1
4051; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4052; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4053; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4054; NoVLX-NEXT:    kmovw %k0, %eax
4055; NoVLX-NEXT:    vzeroupper
4056; NoVLX-NEXT:    retq
4057entry:
4058  %0 = bitcast <4 x i64> %__a to <4 x i64>
4059  %1 = bitcast <4 x i64> %__b to <4 x i64>
4060  %2 = icmp eq <4 x i64> %0, %1
4061  %3 = bitcast i8 %__u to <8 x i1>
4062  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4063  %4 = and <4 x i1> %2, %extract.i
4064  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4065  %6 = bitcast <32 x i1> %5 to i32
4066  ret i32 %6
4067}
4068
4069define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4070; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4071; VLX:       # %bb.0: # %entry
4072; VLX-NEXT:    kmovd %edi, %k1
4073; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4074; VLX-NEXT:    kmovd %k0, %eax
4075; VLX-NEXT:    vzeroupper
4076; VLX-NEXT:    retq
4077;
4078; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4079; NoVLX:       # %bb.0: # %entry
4080; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4081; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
4082; NoVLX-NEXT:    kmovw %edi, %k1
4083; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4084; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4085; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4086; NoVLX-NEXT:    kmovw %k0, %eax
4087; NoVLX-NEXT:    vzeroupper
4088; NoVLX-NEXT:    retq
4089entry:
4090  %0 = bitcast <4 x i64> %__a to <4 x i64>
4091  %load = load <4 x i64>, ptr %__b
4092  %1 = bitcast <4 x i64> %load to <4 x i64>
4093  %2 = icmp eq <4 x i64> %0, %1
4094  %3 = bitcast i8 %__u to <8 x i1>
4095  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4096  %4 = and <4 x i1> %2, %extract.i
4097  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4098  %6 = bitcast <32 x i1> %5 to i32
4099  ret i32 %6
4100}
4101
4102
4103define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4104; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4105; VLX:       # %bb.0: # %entry
4106; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4107; VLX-NEXT:    kmovd %k0, %eax
4108; VLX-NEXT:    vzeroupper
4109; VLX-NEXT:    retq
4110;
4111; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4112; NoVLX:       # %bb.0: # %entry
4113; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4114; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4115; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4116; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4117; NoVLX-NEXT:    kmovw %k0, %eax
4118; NoVLX-NEXT:    vzeroupper
4119; NoVLX-NEXT:    retq
4120entry:
4121  %0 = bitcast <4 x i64> %__a to <4 x i64>
4122  %load = load i64, ptr %__b
4123  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4124  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4125  %2 = icmp eq <4 x i64> %0, %1
4126  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4127  %4 = bitcast <32 x i1> %3 to i32
4128  ret i32 %4
4129}
4130
4131define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4132; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4133; VLX:       # %bb.0: # %entry
4134; VLX-NEXT:    kmovd %edi, %k1
4135; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4136; VLX-NEXT:    kmovd %k0, %eax
4137; VLX-NEXT:    vzeroupper
4138; VLX-NEXT:    retq
4139;
4140; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4141; NoVLX:       # %bb.0: # %entry
4142; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4143; NoVLX-NEXT:    kmovw %edi, %k1
4144; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4145; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4146; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4147; NoVLX-NEXT:    kmovw %k0, %eax
4148; NoVLX-NEXT:    vzeroupper
4149; NoVLX-NEXT:    retq
4150entry:
4151  %0 = bitcast <4 x i64> %__a to <4 x i64>
4152  %load = load i64, ptr %__b
4153  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4154  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4155  %2 = icmp eq <4 x i64> %0, %1
4156  %3 = bitcast i8 %__u to <8 x i1>
4157  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4158  %4 = and <4 x i1> %extract.i, %2
4159  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4160  %6 = bitcast <32 x i1> %5 to i32
4161  ret i32 %6
4162}
4163
4164
4165define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4166; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4167; VLX:       # %bb.0: # %entry
4168; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
4169; VLX-NEXT:    kmovq %k0, %rax
4170; VLX-NEXT:    vzeroupper
4171; VLX-NEXT:    retq
4172;
4173; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4174; NoVLX:       # %bb.0: # %entry
4175; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4176; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4177; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4178; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4179; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4180; NoVLX-NEXT:    kmovw %k0, %eax
4181; NoVLX-NEXT:    vzeroupper
4182; NoVLX-NEXT:    retq
4183entry:
4184  %0 = bitcast <4 x i64> %__a to <4 x i64>
4185  %1 = bitcast <4 x i64> %__b to <4 x i64>
4186  %2 = icmp eq <4 x i64> %0, %1
4187  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4188  %4 = bitcast <64 x i1> %3 to i64
4189  ret i64 %4
4190}
4191
4192define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4193; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4194; VLX:       # %bb.0: # %entry
4195; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
4196; VLX-NEXT:    kmovq %k0, %rax
4197; VLX-NEXT:    vzeroupper
4198; VLX-NEXT:    retq
4199;
4200; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4201; NoVLX:       # %bb.0: # %entry
4202; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4203; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
4204; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4205; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4206; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4207; NoVLX-NEXT:    kmovw %k0, %eax
4208; NoVLX-NEXT:    vzeroupper
4209; NoVLX-NEXT:    retq
4210entry:
4211  %0 = bitcast <4 x i64> %__a to <4 x i64>
4212  %load = load <4 x i64>, ptr %__b
4213  %1 = bitcast <4 x i64> %load to <4 x i64>
4214  %2 = icmp eq <4 x i64> %0, %1
4215  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4216  %4 = bitcast <64 x i1> %3 to i64
4217  ret i64 %4
4218}
4219
4220define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4221; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4222; VLX:       # %bb.0: # %entry
4223; VLX-NEXT:    kmovd %edi, %k1
4224; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4225; VLX-NEXT:    kmovq %k0, %rax
4226; VLX-NEXT:    vzeroupper
4227; VLX-NEXT:    retq
4228;
4229; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4230; NoVLX:       # %bb.0: # %entry
4231; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4232; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4233; NoVLX-NEXT:    kmovw %edi, %k1
4234; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4235; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4236; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4237; NoVLX-NEXT:    kmovw %k0, %eax
4238; NoVLX-NEXT:    vzeroupper
4239; NoVLX-NEXT:    retq
4240entry:
4241  %0 = bitcast <4 x i64> %__a to <4 x i64>
4242  %1 = bitcast <4 x i64> %__b to <4 x i64>
4243  %2 = icmp eq <4 x i64> %0, %1
4244  %3 = bitcast i8 %__u to <8 x i1>
4245  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4246  %4 = and <4 x i1> %2, %extract.i
4247  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4248  %6 = bitcast <64 x i1> %5 to i64
4249  ret i64 %6
4250}
4251
4252define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4253; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4254; VLX:       # %bb.0: # %entry
4255; VLX-NEXT:    kmovd %edi, %k1
4256; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4257; VLX-NEXT:    kmovq %k0, %rax
4258; VLX-NEXT:    vzeroupper
4259; VLX-NEXT:    retq
4260;
4261; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4262; NoVLX:       # %bb.0: # %entry
4263; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4264; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
4265; NoVLX-NEXT:    kmovw %edi, %k1
4266; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4267; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4268; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4269; NoVLX-NEXT:    kmovw %k0, %eax
4270; NoVLX-NEXT:    vzeroupper
4271; NoVLX-NEXT:    retq
4272entry:
4273  %0 = bitcast <4 x i64> %__a to <4 x i64>
4274  %load = load <4 x i64>, ptr %__b
4275  %1 = bitcast <4 x i64> %load to <4 x i64>
4276  %2 = icmp eq <4 x i64> %0, %1
4277  %3 = bitcast i8 %__u to <8 x i1>
4278  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4279  %4 = and <4 x i1> %2, %extract.i
4280  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4281  %6 = bitcast <64 x i1> %5 to i64
4282  ret i64 %6
4283}
4284
4285
4286define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4287; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4288; VLX:       # %bb.0: # %entry
4289; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4290; VLX-NEXT:    kmovq %k0, %rax
4291; VLX-NEXT:    vzeroupper
4292; VLX-NEXT:    retq
4293;
4294; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4295; NoVLX:       # %bb.0: # %entry
4296; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4297; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4298; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4299; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4300; NoVLX-NEXT:    kmovw %k0, %eax
4301; NoVLX-NEXT:    vzeroupper
4302; NoVLX-NEXT:    retq
4303entry:
4304  %0 = bitcast <4 x i64> %__a to <4 x i64>
4305  %load = load i64, ptr %__b
4306  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4307  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4308  %2 = icmp eq <4 x i64> %0, %1
4309  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4310  %4 = bitcast <64 x i1> %3 to i64
4311  ret i64 %4
4312}
4313
4314define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4315; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4316; VLX:       # %bb.0: # %entry
4317; VLX-NEXT:    kmovd %edi, %k1
4318; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4319; VLX-NEXT:    kmovq %k0, %rax
4320; VLX-NEXT:    vzeroupper
4321; VLX-NEXT:    retq
4322;
4323; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4324; NoVLX:       # %bb.0: # %entry
4325; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4326; NoVLX-NEXT:    kmovw %edi, %k1
4327; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4328; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4329; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4330; NoVLX-NEXT:    kmovw %k0, %eax
4331; NoVLX-NEXT:    vzeroupper
4332; NoVLX-NEXT:    retq
4333entry:
4334  %0 = bitcast <4 x i64> %__a to <4 x i64>
4335  %load = load i64, ptr %__b
4336  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4337  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4338  %2 = icmp eq <4 x i64> %0, %1
4339  %3 = bitcast i8 %__u to <8 x i1>
4340  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4341  %4 = and <4 x i1> %extract.i, %2
4342  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4343  %6 = bitcast <64 x i1> %5 to i64
4344  ret i64 %6
4345}
4346
4347
4348define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4349; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4350; VLX:       # %bb.0: # %entry
4351; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4352; VLX-NEXT:    kmovd %k0, %eax
4353; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4354; VLX-NEXT:    vzeroupper
4355; VLX-NEXT:    retq
4356;
4357; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4358; NoVLX:       # %bb.0: # %entry
4359; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4360; NoVLX-NEXT:    kmovw %k0, %eax
4361; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4362; NoVLX-NEXT:    vzeroupper
4363; NoVLX-NEXT:    retq
4364entry:
4365  %0 = bitcast <8 x i64> %__a to <8 x i64>
4366  %1 = bitcast <8 x i64> %__b to <8 x i64>
4367  %2 = icmp eq <8 x i64> %0, %1
4368  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4369  %4 = bitcast <16 x i1> %3 to i16
4370  ret i16 %4
4371}
4372
4373define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4374; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4375; VLX:       # %bb.0: # %entry
4376; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4377; VLX-NEXT:    kmovd %k0, %eax
4378; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4379; VLX-NEXT:    vzeroupper
4380; VLX-NEXT:    retq
4381;
4382; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4383; NoVLX:       # %bb.0: # %entry
4384; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4385; NoVLX-NEXT:    kmovw %k0, %eax
4386; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4387; NoVLX-NEXT:    vzeroupper
4388; NoVLX-NEXT:    retq
4389entry:
4390  %0 = bitcast <8 x i64> %__a to <8 x i64>
4391  %load = load <8 x i64>, ptr %__b
4392  %1 = bitcast <8 x i64> %load to <8 x i64>
4393  %2 = icmp eq <8 x i64> %0, %1
4394  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4395  %4 = bitcast <16 x i1> %3 to i16
4396  ret i16 %4
4397}
4398
4399define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4400; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4401; VLX:       # %bb.0: # %entry
4402; VLX-NEXT:    kmovd %edi, %k1
4403; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4404; VLX-NEXT:    kmovd %k0, %eax
4405; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4406; VLX-NEXT:    vzeroupper
4407; VLX-NEXT:    retq
4408;
4409; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4410; NoVLX:       # %bb.0: # %entry
4411; NoVLX-NEXT:    kmovw %edi, %k1
4412; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4413; NoVLX-NEXT:    kmovw %k0, %eax
4414; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4415; NoVLX-NEXT:    vzeroupper
4416; NoVLX-NEXT:    retq
4417entry:
4418  %0 = bitcast <8 x i64> %__a to <8 x i64>
4419  %1 = bitcast <8 x i64> %__b to <8 x i64>
4420  %2 = icmp eq <8 x i64> %0, %1
4421  %3 = bitcast i8 %__u to <8 x i1>
4422  %4 = and <8 x i1> %2, %3
4423  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4424  %6 = bitcast <16 x i1> %5 to i16
4425  ret i16 %6
4426}
4427
4428define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4429; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4430; VLX:       # %bb.0: # %entry
4431; VLX-NEXT:    kmovd %edi, %k1
4432; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4433; VLX-NEXT:    kmovd %k0, %eax
4434; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4435; VLX-NEXT:    vzeroupper
4436; VLX-NEXT:    retq
4437;
4438; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4439; NoVLX:       # %bb.0: # %entry
4440; NoVLX-NEXT:    kmovw %edi, %k1
4441; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4442; NoVLX-NEXT:    kmovw %k0, %eax
4443; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4444; NoVLX-NEXT:    vzeroupper
4445; NoVLX-NEXT:    retq
4446entry:
4447  %0 = bitcast <8 x i64> %__a to <8 x i64>
4448  %load = load <8 x i64>, ptr %__b
4449  %1 = bitcast <8 x i64> %load to <8 x i64>
4450  %2 = icmp eq <8 x i64> %0, %1
4451  %3 = bitcast i8 %__u to <8 x i1>
4452  %4 = and <8 x i1> %2, %3
4453  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4454  %6 = bitcast <16 x i1> %5 to i16
4455  ret i16 %6
4456}
4457
4458
4459define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4460; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4461; VLX:       # %bb.0: # %entry
4462; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4463; VLX-NEXT:    kmovd %k0, %eax
4464; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4465; VLX-NEXT:    vzeroupper
4466; VLX-NEXT:    retq
4467;
4468; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4469; NoVLX:       # %bb.0: # %entry
4470; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4471; NoVLX-NEXT:    kmovw %k0, %eax
4472; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4473; NoVLX-NEXT:    vzeroupper
4474; NoVLX-NEXT:    retq
4475entry:
4476  %0 = bitcast <8 x i64> %__a to <8 x i64>
4477  %load = load i64, ptr %__b
4478  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4479  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4480  %2 = icmp eq <8 x i64> %0, %1
4481  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4482  %4 = bitcast <16 x i1> %3 to i16
4483  ret i16 %4
4484}
4485
4486define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4487; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4488; VLX:       # %bb.0: # %entry
4489; VLX-NEXT:    kmovd %edi, %k1
4490; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4491; VLX-NEXT:    kmovd %k0, %eax
4492; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4493; VLX-NEXT:    vzeroupper
4494; VLX-NEXT:    retq
4495;
4496; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4497; NoVLX:       # %bb.0: # %entry
4498; NoVLX-NEXT:    kmovw %edi, %k1
4499; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4500; NoVLX-NEXT:    kmovw %k0, %eax
4501; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4502; NoVLX-NEXT:    vzeroupper
4503; NoVLX-NEXT:    retq
4504entry:
4505  %0 = bitcast <8 x i64> %__a to <8 x i64>
4506  %load = load i64, ptr %__b
4507  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4508  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4509  %2 = icmp eq <8 x i64> %0, %1
4510  %3 = bitcast i8 %__u to <8 x i1>
4511  %4 = and <8 x i1> %3, %2
4512  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4513  %6 = bitcast <16 x i1> %5 to i16
4514  ret i16 %6
4515}
4516
4517
4518define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4519; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4520; VLX:       # %bb.0: # %entry
4521; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4522; VLX-NEXT:    kmovd %k0, %eax
4523; VLX-NEXT:    vzeroupper
4524; VLX-NEXT:    retq
4525;
4526; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4527; NoVLX:       # %bb.0: # %entry
4528; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4529; NoVLX-NEXT:    kmovw %k0, %eax
4530; NoVLX-NEXT:    vzeroupper
4531; NoVLX-NEXT:    retq
4532entry:
4533  %0 = bitcast <8 x i64> %__a to <8 x i64>
4534  %1 = bitcast <8 x i64> %__b to <8 x i64>
4535  %2 = icmp eq <8 x i64> %0, %1
4536  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4537  %4 = bitcast <32 x i1> %3 to i32
4538  ret i32 %4
4539}
4540
4541define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4542; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4543; VLX:       # %bb.0: # %entry
4544; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4545; VLX-NEXT:    kmovd %k0, %eax
4546; VLX-NEXT:    vzeroupper
4547; VLX-NEXT:    retq
4548;
4549; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4550; NoVLX:       # %bb.0: # %entry
4551; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4552; NoVLX-NEXT:    kmovw %k0, %eax
4553; NoVLX-NEXT:    vzeroupper
4554; NoVLX-NEXT:    retq
4555entry:
4556  %0 = bitcast <8 x i64> %__a to <8 x i64>
4557  %load = load <8 x i64>, ptr %__b
4558  %1 = bitcast <8 x i64> %load to <8 x i64>
4559  %2 = icmp eq <8 x i64> %0, %1
4560  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4561  %4 = bitcast <32 x i1> %3 to i32
4562  ret i32 %4
4563}
4564
4565define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4566; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4567; VLX:       # %bb.0: # %entry
4568; VLX-NEXT:    kmovd %edi, %k1
4569; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4570; VLX-NEXT:    kmovd %k0, %eax
4571; VLX-NEXT:    vzeroupper
4572; VLX-NEXT:    retq
4573;
4574; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4575; NoVLX:       # %bb.0: # %entry
4576; NoVLX-NEXT:    kmovw %edi, %k1
4577; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4578; NoVLX-NEXT:    kmovw %k0, %eax
4579; NoVLX-NEXT:    vzeroupper
4580; NoVLX-NEXT:    retq
4581entry:
4582  %0 = bitcast <8 x i64> %__a to <8 x i64>
4583  %1 = bitcast <8 x i64> %__b to <8 x i64>
4584  %2 = icmp eq <8 x i64> %0, %1
4585  %3 = bitcast i8 %__u to <8 x i1>
4586  %4 = and <8 x i1> %2, %3
4587  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4588  %6 = bitcast <32 x i1> %5 to i32
4589  ret i32 %6
4590}
4591
4592define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4593; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4594; VLX:       # %bb.0: # %entry
4595; VLX-NEXT:    kmovd %edi, %k1
4596; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4597; VLX-NEXT:    kmovd %k0, %eax
4598; VLX-NEXT:    vzeroupper
4599; VLX-NEXT:    retq
4600;
4601; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4602; NoVLX:       # %bb.0: # %entry
4603; NoVLX-NEXT:    kmovw %edi, %k1
4604; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4605; NoVLX-NEXT:    kmovw %k0, %eax
4606; NoVLX-NEXT:    vzeroupper
4607; NoVLX-NEXT:    retq
4608entry:
4609  %0 = bitcast <8 x i64> %__a to <8 x i64>
4610  %load = load <8 x i64>, ptr %__b
4611  %1 = bitcast <8 x i64> %load to <8 x i64>
4612  %2 = icmp eq <8 x i64> %0, %1
4613  %3 = bitcast i8 %__u to <8 x i1>
4614  %4 = and <8 x i1> %2, %3
4615  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4616  %6 = bitcast <32 x i1> %5 to i32
4617  ret i32 %6
4618}
4619
4620
4621define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4622; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4623; VLX:       # %bb.0: # %entry
4624; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4625; VLX-NEXT:    kmovd %k0, %eax
4626; VLX-NEXT:    vzeroupper
4627; VLX-NEXT:    retq
4628;
4629; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4630; NoVLX:       # %bb.0: # %entry
4631; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4632; NoVLX-NEXT:    kmovw %k0, %eax
4633; NoVLX-NEXT:    vzeroupper
4634; NoVLX-NEXT:    retq
4635entry:
4636  %0 = bitcast <8 x i64> %__a to <8 x i64>
4637  %load = load i64, ptr %__b
4638  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4639  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4640  %2 = icmp eq <8 x i64> %0, %1
4641  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4642  %4 = bitcast <32 x i1> %3 to i32
4643  ret i32 %4
4644}
4645
4646define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4647; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4648; VLX:       # %bb.0: # %entry
4649; VLX-NEXT:    kmovd %edi, %k1
4650; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4651; VLX-NEXT:    kmovd %k0, %eax
4652; VLX-NEXT:    vzeroupper
4653; VLX-NEXT:    retq
4654;
4655; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4656; NoVLX:       # %bb.0: # %entry
4657; NoVLX-NEXT:    kmovw %edi, %k1
4658; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4659; NoVLX-NEXT:    kmovw %k0, %eax
4660; NoVLX-NEXT:    vzeroupper
4661; NoVLX-NEXT:    retq
4662entry:
4663  %0 = bitcast <8 x i64> %__a to <8 x i64>
4664  %load = load i64, ptr %__b
4665  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4666  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4667  %2 = icmp eq <8 x i64> %0, %1
4668  %3 = bitcast i8 %__u to <8 x i1>
4669  %4 = and <8 x i1> %3, %2
4670  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4671  %6 = bitcast <32 x i1> %5 to i32
4672  ret i32 %6
4673}
4674
4675
4676define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4677; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4678; VLX:       # %bb.0: # %entry
4679; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4680; VLX-NEXT:    kmovq %k0, %rax
4681; VLX-NEXT:    vzeroupper
4682; VLX-NEXT:    retq
4683;
4684; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4685; NoVLX:       # %bb.0: # %entry
4686; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4687; NoVLX-NEXT:    kmovw %k0, %eax
4688; NoVLX-NEXT:    vzeroupper
4689; NoVLX-NEXT:    retq
4690entry:
4691  %0 = bitcast <8 x i64> %__a to <8 x i64>
4692  %1 = bitcast <8 x i64> %__b to <8 x i64>
4693  %2 = icmp eq <8 x i64> %0, %1
4694  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4695  %4 = bitcast <64 x i1> %3 to i64
4696  ret i64 %4
4697}
4698
4699define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4700; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4701; VLX:       # %bb.0: # %entry
4702; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4703; VLX-NEXT:    kmovq %k0, %rax
4704; VLX-NEXT:    vzeroupper
4705; VLX-NEXT:    retq
4706;
4707; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4708; NoVLX:       # %bb.0: # %entry
4709; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4710; NoVLX-NEXT:    kmovw %k0, %eax
4711; NoVLX-NEXT:    vzeroupper
4712; NoVLX-NEXT:    retq
4713entry:
4714  %0 = bitcast <8 x i64> %__a to <8 x i64>
4715  %load = load <8 x i64>, ptr %__b
4716  %1 = bitcast <8 x i64> %load to <8 x i64>
4717  %2 = icmp eq <8 x i64> %0, %1
4718  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4719  %4 = bitcast <64 x i1> %3 to i64
4720  ret i64 %4
4721}
4722
4723define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4724; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4725; VLX:       # %bb.0: # %entry
4726; VLX-NEXT:    kmovd %edi, %k1
4727; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4728; VLX-NEXT:    kmovq %k0, %rax
4729; VLX-NEXT:    vzeroupper
4730; VLX-NEXT:    retq
4731;
4732; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4733; NoVLX:       # %bb.0: # %entry
4734; NoVLX-NEXT:    kmovw %edi, %k1
4735; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4736; NoVLX-NEXT:    kmovw %k0, %eax
4737; NoVLX-NEXT:    vzeroupper
4738; NoVLX-NEXT:    retq
4739entry:
4740  %0 = bitcast <8 x i64> %__a to <8 x i64>
4741  %1 = bitcast <8 x i64> %__b to <8 x i64>
4742  %2 = icmp eq <8 x i64> %0, %1
4743  %3 = bitcast i8 %__u to <8 x i1>
4744  %4 = and <8 x i1> %2, %3
4745  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4746  %6 = bitcast <64 x i1> %5 to i64
4747  ret i64 %6
4748}
4749
4750define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4751; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4752; VLX:       # %bb.0: # %entry
4753; VLX-NEXT:    kmovd %edi, %k1
4754; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4755; VLX-NEXT:    kmovq %k0, %rax
4756; VLX-NEXT:    vzeroupper
4757; VLX-NEXT:    retq
4758;
4759; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4760; NoVLX:       # %bb.0: # %entry
4761; NoVLX-NEXT:    kmovw %edi, %k1
4762; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4763; NoVLX-NEXT:    kmovw %k0, %eax
4764; NoVLX-NEXT:    vzeroupper
4765; NoVLX-NEXT:    retq
4766entry:
4767  %0 = bitcast <8 x i64> %__a to <8 x i64>
4768  %load = load <8 x i64>, ptr %__b
4769  %1 = bitcast <8 x i64> %load to <8 x i64>
4770  %2 = icmp eq <8 x i64> %0, %1
4771  %3 = bitcast i8 %__u to <8 x i1>
4772  %4 = and <8 x i1> %2, %3
4773  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4774  %6 = bitcast <64 x i1> %5 to i64
4775  ret i64 %6
4776}
4777
4778
4779define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4780; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4781; VLX:       # %bb.0: # %entry
4782; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4783; VLX-NEXT:    kmovq %k0, %rax
4784; VLX-NEXT:    vzeroupper
4785; VLX-NEXT:    retq
4786;
4787; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4788; NoVLX:       # %bb.0: # %entry
4789; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4790; NoVLX-NEXT:    kmovw %k0, %eax
4791; NoVLX-NEXT:    vzeroupper
4792; NoVLX-NEXT:    retq
4793entry:
4794  %0 = bitcast <8 x i64> %__a to <8 x i64>
4795  %load = load i64, ptr %__b
4796  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4797  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4798  %2 = icmp eq <8 x i64> %0, %1
4799  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4800  %4 = bitcast <64 x i1> %3 to i64
4801  ret i64 %4
4802}
4803
4804define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4805; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4806; VLX:       # %bb.0: # %entry
4807; VLX-NEXT:    kmovd %edi, %k1
4808; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4809; VLX-NEXT:    kmovq %k0, %rax
4810; VLX-NEXT:    vzeroupper
4811; VLX-NEXT:    retq
4812;
4813; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4814; NoVLX:       # %bb.0: # %entry
4815; NoVLX-NEXT:    kmovw %edi, %k1
4816; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4817; NoVLX-NEXT:    kmovw %k0, %eax
4818; NoVLX-NEXT:    vzeroupper
4819; NoVLX-NEXT:    retq
4820entry:
4821  %0 = bitcast <8 x i64> %__a to <8 x i64>
4822  %load = load i64, ptr %__b
4823  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4824  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4825  %2 = icmp eq <8 x i64> %0, %1
4826  %3 = bitcast i8 %__u to <8 x i1>
4827  %4 = and <8 x i1> %3, %2
4828  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4829  %6 = bitcast <64 x i1> %5 to i64
4830  ret i64 %6
4831}
4832
4833
4834define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4835; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4836; VLX:       # %bb.0: # %entry
4837; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
4838; VLX-NEXT:    kmovd %k0, %eax
4839; VLX-NEXT:    retq
4840;
4841; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4842; NoVLX:       # %bb.0: # %entry
4843; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
4844; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4845; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4846; NoVLX-NEXT:    kmovw %k0, %eax
4847; NoVLX-NEXT:    vzeroupper
4848; NoVLX-NEXT:    retq
4849entry:
4850  %0 = bitcast <2 x i64> %__a to <16 x i8>
4851  %1 = bitcast <2 x i64> %__b to <16 x i8>
4852  %2 = icmp sgt <16 x i8> %0, %1
4853  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4854  %4 = bitcast <32 x i1> %3 to i32
4855  ret i32 %4
4856}
4857
4858define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
4859; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4860; VLX:       # %bb.0: # %entry
4861; VLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %k0
4862; VLX-NEXT:    kmovd %k0, %eax
4863; VLX-NEXT:    retq
4864;
4865; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4866; NoVLX:       # %bb.0: # %entry
4867; NoVLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0
4868; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4869; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4870; NoVLX-NEXT:    kmovw %k0, %eax
4871; NoVLX-NEXT:    vzeroupper
4872; NoVLX-NEXT:    retq
4873entry:
4874  %0 = bitcast <2 x i64> %__a to <16 x i8>
4875  %load = load <2 x i64>, ptr %__b
4876  %1 = bitcast <2 x i64> %load to <16 x i8>
4877  %2 = icmp sgt <16 x i8> %0, %1
4878  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4879  %4 = bitcast <32 x i1> %3 to i32
4880  ret i32 %4
4881}
4882
4883define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4884; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4885; VLX:       # %bb.0: # %entry
4886; VLX-NEXT:    kmovd %edi, %k1
4887; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4888; VLX-NEXT:    kmovd %k0, %eax
4889; VLX-NEXT:    retq
4890;
4891; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4892; NoVLX:       # %bb.0: # %entry
4893; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
4894; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4895; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4896; NoVLX-NEXT:    kmovw %k0, %eax
4897; NoVLX-NEXT:    andl %edi, %eax
4898; NoVLX-NEXT:    vzeroupper
4899; NoVLX-NEXT:    retq
4900entry:
4901  %0 = bitcast <2 x i64> %__a to <16 x i8>
4902  %1 = bitcast <2 x i64> %__b to <16 x i8>
4903  %2 = icmp sgt <16 x i8> %0, %1
4904  %3 = bitcast i16 %__u to <16 x i1>
4905  %4 = and <16 x i1> %2, %3
4906  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4907  %6 = bitcast <32 x i1> %5 to i32
4908  ret i32 %6
4909}
4910
4911define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
4912; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4913; VLX:       # %bb.0: # %entry
4914; VLX-NEXT:    kmovd %edi, %k1
4915; VLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4916; VLX-NEXT:    kmovd %k0, %eax
4917; VLX-NEXT:    retq
4918;
4919; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4920; NoVLX:       # %bb.0: # %entry
4921; NoVLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
4922; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4923; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4924; NoVLX-NEXT:    kmovw %k0, %eax
4925; NoVLX-NEXT:    andl %edi, %eax
4926; NoVLX-NEXT:    vzeroupper
4927; NoVLX-NEXT:    retq
4928entry:
4929  %0 = bitcast <2 x i64> %__a to <16 x i8>
4930  %load = load <2 x i64>, ptr %__b
4931  %1 = bitcast <2 x i64> %load to <16 x i8>
4932  %2 = icmp sgt <16 x i8> %0, %1
4933  %3 = bitcast i16 %__u to <16 x i1>
4934  %4 = and <16 x i1> %2, %3
4935  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4936  %6 = bitcast <32 x i1> %5 to i32
4937  ret i32 %6
4938}
4939
4940
4941define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4942; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4943; VLX:       # %bb.0: # %entry
4944; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
4945; VLX-NEXT:    kmovq %k0, %rax
4946; VLX-NEXT:    retq
4947;
4948; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4949; NoVLX:       # %bb.0: # %entry
4950; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
4951; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4952; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4953; NoVLX-NEXT:    kmovw %k0, %eax
4954; NoVLX-NEXT:    vzeroupper
4955; NoVLX-NEXT:    retq
4956entry:
4957  %0 = bitcast <2 x i64> %__a to <16 x i8>
4958  %1 = bitcast <2 x i64> %__b to <16 x i8>
4959  %2 = icmp sgt <16 x i8> %0, %1
4960  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4961  %4 = bitcast <64 x i1> %3 to i64
4962  ret i64 %4
4963}
4964
4965define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
4966; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4967; VLX:       # %bb.0: # %entry
4968; VLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %k0
4969; VLX-NEXT:    kmovq %k0, %rax
4970; VLX-NEXT:    retq
4971;
4972; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4973; NoVLX:       # %bb.0: # %entry
4974; NoVLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0
4975; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4976; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4977; NoVLX-NEXT:    kmovw %k0, %eax
4978; NoVLX-NEXT:    vzeroupper
4979; NoVLX-NEXT:    retq
4980entry:
4981  %0 = bitcast <2 x i64> %__a to <16 x i8>
4982  %load = load <2 x i64>, ptr %__b
4983  %1 = bitcast <2 x i64> %load to <16 x i8>
4984  %2 = icmp sgt <16 x i8> %0, %1
4985  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4986  %4 = bitcast <64 x i1> %3 to i64
4987  ret i64 %4
4988}
4989
4990define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4991; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4992; VLX:       # %bb.0: # %entry
4993; VLX-NEXT:    kmovd %edi, %k1
4994; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4995; VLX-NEXT:    kmovq %k0, %rax
4996; VLX-NEXT:    retq
4997;
4998; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4999; NoVLX:       # %bb.0: # %entry
5000; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
5001; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5002; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5003; NoVLX-NEXT:    kmovw %k0, %eax
5004; NoVLX-NEXT:    andl %edi, %eax
5005; NoVLX-NEXT:    vzeroupper
5006; NoVLX-NEXT:    retq
5007entry:
5008  %0 = bitcast <2 x i64> %__a to <16 x i8>
5009  %1 = bitcast <2 x i64> %__b to <16 x i8>
5010  %2 = icmp sgt <16 x i8> %0, %1
5011  %3 = bitcast i16 %__u to <16 x i1>
5012  %4 = and <16 x i1> %2, %3
5013  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5014  %6 = bitcast <64 x i1> %5 to i64
5015  ret i64 %6
5016}
5017
5018define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5019; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5020; VLX:       # %bb.0: # %entry
5021; VLX-NEXT:    kmovd %edi, %k1
5022; VLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
5023; VLX-NEXT:    kmovq %k0, %rax
5024; VLX-NEXT:    retq
5025;
5026; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5027; NoVLX:       # %bb.0: # %entry
5028; NoVLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
5029; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5030; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5031; NoVLX-NEXT:    kmovw %k0, %eax
5032; NoVLX-NEXT:    andl %edi, %eax
5033; NoVLX-NEXT:    vzeroupper
5034; NoVLX-NEXT:    retq
5035entry:
5036  %0 = bitcast <2 x i64> %__a to <16 x i8>
5037  %load = load <2 x i64>, ptr %__b
5038  %1 = bitcast <2 x i64> %load to <16 x i8>
5039  %2 = icmp sgt <16 x i8> %0, %1
5040  %3 = bitcast i16 %__u to <16 x i1>
5041  %4 = and <16 x i1> %2, %3
5042  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5043  %6 = bitcast <64 x i1> %5 to i64
5044  ret i64 %6
5045}
5046
5047
5048define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5049; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5050; VLX:       # %bb.0: # %entry
5051; VLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
5052; VLX-NEXT:    kmovq %k0, %rax
5053; VLX-NEXT:    vzeroupper
5054; VLX-NEXT:    retq
5055;
5056; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5057; NoVLX:       # %bb.0: # %entry
5058; NoVLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
5059; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5060; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5061; NoVLX-NEXT:    kmovw %k0, %ecx
5062; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5063; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5064; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5065; NoVLX-NEXT:    kmovw %k0, %eax
5066; NoVLX-NEXT:    shll $16, %eax
5067; NoVLX-NEXT:    orl %ecx, %eax
5068; NoVLX-NEXT:    vzeroupper
5069; NoVLX-NEXT:    retq
5070entry:
5071  %0 = bitcast <4 x i64> %__a to <32 x i8>
5072  %1 = bitcast <4 x i64> %__b to <32 x i8>
5073  %2 = icmp sgt <32 x i8> %0, %1
5074  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5075  %4 = bitcast <64 x i1> %3 to i64
5076  ret i64 %4
5077}
5078
5079define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5080; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5081; VLX:       # %bb.0: # %entry
5082; VLX-NEXT:    vpcmpgtb (%rdi), %ymm0, %k0
5083; VLX-NEXT:    kmovq %k0, %rax
5084; VLX-NEXT:    vzeroupper
5085; VLX-NEXT:    retq
5086;
5087; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5088; NoVLX:       # %bb.0: # %entry
5089; NoVLX-NEXT:    vpcmpgtb (%rdi), %ymm0, %ymm0
5090; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5091; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5092; NoVLX-NEXT:    kmovw %k0, %ecx
5093; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5094; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5095; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5096; NoVLX-NEXT:    kmovw %k0, %eax
5097; NoVLX-NEXT:    shll $16, %eax
5098; NoVLX-NEXT:    orl %ecx, %eax
5099; NoVLX-NEXT:    vzeroupper
5100; NoVLX-NEXT:    retq
5101entry:
5102  %0 = bitcast <4 x i64> %__a to <32 x i8>
5103  %load = load <4 x i64>, ptr %__b
5104  %1 = bitcast <4 x i64> %load to <32 x i8>
5105  %2 = icmp sgt <32 x i8> %0, %1
5106  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5107  %4 = bitcast <64 x i1> %3 to i64
5108  ret i64 %4
5109}
5110
5111define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5112; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5113; VLX:       # %bb.0: # %entry
5114; VLX-NEXT:    kmovd %edi, %k1
5115; VLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5116; VLX-NEXT:    kmovq %k0, %rax
5117; VLX-NEXT:    vzeroupper
5118; VLX-NEXT:    retq
5119;
5120; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5121; NoVLX:       # %bb.0: # %entry
5122; NoVLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
5123; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5124; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5125; NoVLX-NEXT:    kmovw %k0, %eax
5126; NoVLX-NEXT:    andl %edi, %eax
5127; NoVLX-NEXT:    shrl $16, %edi
5128; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5129; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5130; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5131; NoVLX-NEXT:    kmovw %k0, %ecx
5132; NoVLX-NEXT:    andl %edi, %ecx
5133; NoVLX-NEXT:    shll $16, %ecx
5134; NoVLX-NEXT:    movzwl %ax, %eax
5135; NoVLX-NEXT:    orl %ecx, %eax
5136; NoVLX-NEXT:    vzeroupper
5137; NoVLX-NEXT:    retq
5138entry:
5139  %0 = bitcast <4 x i64> %__a to <32 x i8>
5140  %1 = bitcast <4 x i64> %__b to <32 x i8>
5141  %2 = icmp sgt <32 x i8> %0, %1
5142  %3 = bitcast i32 %__u to <32 x i1>
5143  %4 = and <32 x i1> %2, %3
5144  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5145  %6 = bitcast <64 x i1> %5 to i64
5146  ret i64 %6
5147}
5148
5149define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5150; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5151; VLX:       # %bb.0: # %entry
5152; VLX-NEXT:    kmovd %edi, %k1
5153; VLX-NEXT:    vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5154; VLX-NEXT:    kmovq %k0, %rax
5155; VLX-NEXT:    vzeroupper
5156; VLX-NEXT:    retq
5157;
5158; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5159; NoVLX:       # %bb.0: # %entry
5160; NoVLX-NEXT:    vpcmpgtb (%rsi), %ymm0, %ymm0
5161; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5162; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5163; NoVLX-NEXT:    kmovw %k0, %eax
5164; NoVLX-NEXT:    andl %edi, %eax
5165; NoVLX-NEXT:    shrl $16, %edi
5166; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5167; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5168; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5169; NoVLX-NEXT:    kmovw %k0, %ecx
5170; NoVLX-NEXT:    andl %edi, %ecx
5171; NoVLX-NEXT:    shll $16, %ecx
5172; NoVLX-NEXT:    movzwl %ax, %eax
5173; NoVLX-NEXT:    orl %ecx, %eax
5174; NoVLX-NEXT:    vzeroupper
5175; NoVLX-NEXT:    retq
5176entry:
5177  %0 = bitcast <4 x i64> %__a to <32 x i8>
5178  %load = load <4 x i64>, ptr %__b
5179  %1 = bitcast <4 x i64> %load to <32 x i8>
5180  %2 = icmp sgt <32 x i8> %0, %1
5181  %3 = bitcast i32 %__u to <32 x i1>
5182  %4 = and <32 x i1> %2, %3
5183  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5184  %6 = bitcast <64 x i1> %5 to i64
5185  ret i64 %6
5186}
5187
5188
5189define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5190; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5191; VLX:       # %bb.0: # %entry
5192; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5193; VLX-NEXT:    kmovd %k0, %eax
5194; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5195; VLX-NEXT:    retq
5196;
5197; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5198; NoVLX:       # %bb.0: # %entry
5199; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5200; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5201; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5202; NoVLX-NEXT:    kmovw %k0, %eax
5203; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5204; NoVLX-NEXT:    vzeroupper
5205; NoVLX-NEXT:    retq
5206entry:
5207  %0 = bitcast <2 x i64> %__a to <8 x i16>
5208  %1 = bitcast <2 x i64> %__b to <8 x i16>
5209  %2 = icmp sgt <8 x i16> %0, %1
5210  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5211  %4 = bitcast <16 x i1> %3 to i16
5212  ret i16 %4
5213}
5214
5215define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5216; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5217; VLX:       # %bb.0: # %entry
5218; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5219; VLX-NEXT:    kmovd %k0, %eax
5220; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5221; VLX-NEXT:    retq
5222;
5223; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5224; NoVLX:       # %bb.0: # %entry
5225; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5226; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5227; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5228; NoVLX-NEXT:    kmovw %k0, %eax
5229; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5230; NoVLX-NEXT:    vzeroupper
5231; NoVLX-NEXT:    retq
5232entry:
5233  %0 = bitcast <2 x i64> %__a to <8 x i16>
5234  %load = load <2 x i64>, ptr %__b
5235  %1 = bitcast <2 x i64> %load to <8 x i16>
5236  %2 = icmp sgt <8 x i16> %0, %1
5237  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5238  %4 = bitcast <16 x i1> %3 to i16
5239  ret i16 %4
5240}
5241
5242define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5243; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5244; VLX:       # %bb.0: # %entry
5245; VLX-NEXT:    kmovd %edi, %k1
5246; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5247; VLX-NEXT:    kmovd %k0, %eax
5248; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5249; VLX-NEXT:    retq
5250;
5251; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5252; NoVLX:       # %bb.0: # %entry
5253; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5254; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5255; NoVLX-NEXT:    kmovw %edi, %k1
5256; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5257; NoVLX-NEXT:    kmovw %k0, %eax
5258; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5259; NoVLX-NEXT:    vzeroupper
5260; NoVLX-NEXT:    retq
5261entry:
5262  %0 = bitcast <2 x i64> %__a to <8 x i16>
5263  %1 = bitcast <2 x i64> %__b to <8 x i16>
5264  %2 = icmp sgt <8 x i16> %0, %1
5265  %3 = bitcast i8 %__u to <8 x i1>
5266  %4 = and <8 x i1> %2, %3
5267  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5268  %6 = bitcast <16 x i1> %5 to i16
5269  ret i16 %6
5270}
5271
5272define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5273; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5274; VLX:       # %bb.0: # %entry
5275; VLX-NEXT:    kmovd %edi, %k1
5276; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5277; VLX-NEXT:    kmovd %k0, %eax
5278; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5279; VLX-NEXT:    retq
5280;
5281; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5282; NoVLX:       # %bb.0: # %entry
5283; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5284; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5285; NoVLX-NEXT:    kmovw %edi, %k1
5286; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5287; NoVLX-NEXT:    kmovw %k0, %eax
5288; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5289; NoVLX-NEXT:    vzeroupper
5290; NoVLX-NEXT:    retq
5291entry:
5292  %0 = bitcast <2 x i64> %__a to <8 x i16>
5293  %load = load <2 x i64>, ptr %__b
5294  %1 = bitcast <2 x i64> %load to <8 x i16>
5295  %2 = icmp sgt <8 x i16> %0, %1
5296  %3 = bitcast i8 %__u to <8 x i1>
5297  %4 = and <8 x i1> %2, %3
5298  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5299  %6 = bitcast <16 x i1> %5 to i16
5300  ret i16 %6
5301}
5302
5303
5304define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5305; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5306; VLX:       # %bb.0: # %entry
5307; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5308; VLX-NEXT:    kmovd %k0, %eax
5309; VLX-NEXT:    retq
5310;
5311; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5312; NoVLX:       # %bb.0: # %entry
5313; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5314; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5315; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5316; NoVLX-NEXT:    kmovw %k0, %eax
5317; NoVLX-NEXT:    vzeroupper
5318; NoVLX-NEXT:    retq
5319entry:
5320  %0 = bitcast <2 x i64> %__a to <8 x i16>
5321  %1 = bitcast <2 x i64> %__b to <8 x i16>
5322  %2 = icmp sgt <8 x i16> %0, %1
5323  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5324  %4 = bitcast <32 x i1> %3 to i32
5325  ret i32 %4
5326}
5327
5328define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5329; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5330; VLX:       # %bb.0: # %entry
5331; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5332; VLX-NEXT:    kmovd %k0, %eax
5333; VLX-NEXT:    retq
5334;
5335; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5336; NoVLX:       # %bb.0: # %entry
5337; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5338; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5339; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5340; NoVLX-NEXT:    kmovw %k0, %eax
5341; NoVLX-NEXT:    vzeroupper
5342; NoVLX-NEXT:    retq
5343entry:
5344  %0 = bitcast <2 x i64> %__a to <8 x i16>
5345  %load = load <2 x i64>, ptr %__b
5346  %1 = bitcast <2 x i64> %load to <8 x i16>
5347  %2 = icmp sgt <8 x i16> %0, %1
5348  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5349  %4 = bitcast <32 x i1> %3 to i32
5350  ret i32 %4
5351}
5352
5353define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5354; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5355; VLX:       # %bb.0: # %entry
5356; VLX-NEXT:    kmovd %edi, %k1
5357; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5358; VLX-NEXT:    kmovd %k0, %eax
5359; VLX-NEXT:    retq
5360;
5361; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5362; NoVLX:       # %bb.0: # %entry
5363; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5364; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5365; NoVLX-NEXT:    kmovw %edi, %k1
5366; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5367; NoVLX-NEXT:    kmovw %k0, %eax
5368; NoVLX-NEXT:    vzeroupper
5369; NoVLX-NEXT:    retq
5370entry:
5371  %0 = bitcast <2 x i64> %__a to <8 x i16>
5372  %1 = bitcast <2 x i64> %__b to <8 x i16>
5373  %2 = icmp sgt <8 x i16> %0, %1
5374  %3 = bitcast i8 %__u to <8 x i1>
5375  %4 = and <8 x i1> %2, %3
5376  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5377  %6 = bitcast <32 x i1> %5 to i32
5378  ret i32 %6
5379}
5380
5381define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5382; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5383; VLX:       # %bb.0: # %entry
5384; VLX-NEXT:    kmovd %edi, %k1
5385; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5386; VLX-NEXT:    kmovd %k0, %eax
5387; VLX-NEXT:    retq
5388;
5389; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5390; NoVLX:       # %bb.0: # %entry
5391; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5392; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5393; NoVLX-NEXT:    kmovw %edi, %k1
5394; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5395; NoVLX-NEXT:    kmovw %k0, %eax
5396; NoVLX-NEXT:    vzeroupper
5397; NoVLX-NEXT:    retq
5398entry:
5399  %0 = bitcast <2 x i64> %__a to <8 x i16>
5400  %load = load <2 x i64>, ptr %__b
5401  %1 = bitcast <2 x i64> %load to <8 x i16>
5402  %2 = icmp sgt <8 x i16> %0, %1
5403  %3 = bitcast i8 %__u to <8 x i1>
5404  %4 = and <8 x i1> %2, %3
5405  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5406  %6 = bitcast <32 x i1> %5 to i32
5407  ret i32 %6
5408}
5409
5410
5411define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5412; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5413; VLX:       # %bb.0: # %entry
5414; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5415; VLX-NEXT:    kmovq %k0, %rax
5416; VLX-NEXT:    retq
5417;
5418; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5419; NoVLX:       # %bb.0: # %entry
5420; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5421; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5422; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5423; NoVLX-NEXT:    kmovw %k0, %eax
5424; NoVLX-NEXT:    vzeroupper
5425; NoVLX-NEXT:    retq
5426entry:
5427  %0 = bitcast <2 x i64> %__a to <8 x i16>
5428  %1 = bitcast <2 x i64> %__b to <8 x i16>
5429  %2 = icmp sgt <8 x i16> %0, %1
5430  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5431  %4 = bitcast <64 x i1> %3 to i64
5432  ret i64 %4
5433}
5434
5435define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5436; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5437; VLX:       # %bb.0: # %entry
5438; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5439; VLX-NEXT:    kmovq %k0, %rax
5440; VLX-NEXT:    retq
5441;
5442; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5443; NoVLX:       # %bb.0: # %entry
5444; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5445; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5446; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5447; NoVLX-NEXT:    kmovw %k0, %eax
5448; NoVLX-NEXT:    vzeroupper
5449; NoVLX-NEXT:    retq
5450entry:
5451  %0 = bitcast <2 x i64> %__a to <8 x i16>
5452  %load = load <2 x i64>, ptr %__b
5453  %1 = bitcast <2 x i64> %load to <8 x i16>
5454  %2 = icmp sgt <8 x i16> %0, %1
5455  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5456  %4 = bitcast <64 x i1> %3 to i64
5457  ret i64 %4
5458}
5459
5460define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5461; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5462; VLX:       # %bb.0: # %entry
5463; VLX-NEXT:    kmovd %edi, %k1
5464; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5465; VLX-NEXT:    kmovq %k0, %rax
5466; VLX-NEXT:    retq
5467;
5468; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5469; NoVLX:       # %bb.0: # %entry
5470; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5471; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5472; NoVLX-NEXT:    kmovw %edi, %k1
5473; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5474; NoVLX-NEXT:    kmovw %k0, %eax
5475; NoVLX-NEXT:    vzeroupper
5476; NoVLX-NEXT:    retq
5477entry:
5478  %0 = bitcast <2 x i64> %__a to <8 x i16>
5479  %1 = bitcast <2 x i64> %__b to <8 x i16>
5480  %2 = icmp sgt <8 x i16> %0, %1
5481  %3 = bitcast i8 %__u to <8 x i1>
5482  %4 = and <8 x i1> %2, %3
5483  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5484  %6 = bitcast <64 x i1> %5 to i64
5485  ret i64 %6
5486}
5487
5488define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5489; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5490; VLX:       # %bb.0: # %entry
5491; VLX-NEXT:    kmovd %edi, %k1
5492; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5493; VLX-NEXT:    kmovq %k0, %rax
5494; VLX-NEXT:    retq
5495;
5496; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5497; NoVLX:       # %bb.0: # %entry
5498; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5499; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5500; NoVLX-NEXT:    kmovw %edi, %k1
5501; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5502; NoVLX-NEXT:    kmovw %k0, %eax
5503; NoVLX-NEXT:    vzeroupper
5504; NoVLX-NEXT:    retq
5505entry:
5506  %0 = bitcast <2 x i64> %__a to <8 x i16>
5507  %load = load <2 x i64>, ptr %__b
5508  %1 = bitcast <2 x i64> %load to <8 x i16>
5509  %2 = icmp sgt <8 x i16> %0, %1
5510  %3 = bitcast i8 %__u to <8 x i1>
5511  %4 = and <8 x i1> %2, %3
5512  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5513  %6 = bitcast <64 x i1> %5 to i64
5514  ret i64 %6
5515}
5516
5517
5518define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5519; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5520; VLX:       # %bb.0: # %entry
5521; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
5522; VLX-NEXT:    kmovd %k0, %eax
5523; VLX-NEXT:    vzeroupper
5524; VLX-NEXT:    retq
5525;
5526; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5527; NoVLX:       # %bb.0: # %entry
5528; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5529; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5530; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5531; NoVLX-NEXT:    kmovw %k0, %eax
5532; NoVLX-NEXT:    vzeroupper
5533; NoVLX-NEXT:    retq
5534entry:
5535  %0 = bitcast <4 x i64> %__a to <16 x i16>
5536  %1 = bitcast <4 x i64> %__b to <16 x i16>
5537  %2 = icmp sgt <16 x i16> %0, %1
5538  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5539  %4 = bitcast <32 x i1> %3 to i32
5540  ret i32 %4
5541}
5542
5543define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5544; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5545; VLX:       # %bb.0: # %entry
5546; VLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %k0
5547; VLX-NEXT:    kmovd %k0, %eax
5548; VLX-NEXT:    vzeroupper
5549; VLX-NEXT:    retq
5550;
5551; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5552; NoVLX:       # %bb.0: # %entry
5553; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0
5554; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5555; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5556; NoVLX-NEXT:    kmovw %k0, %eax
5557; NoVLX-NEXT:    vzeroupper
5558; NoVLX-NEXT:    retq
5559entry:
5560  %0 = bitcast <4 x i64> %__a to <16 x i16>
5561  %load = load <4 x i64>, ptr %__b
5562  %1 = bitcast <4 x i64> %load to <16 x i16>
5563  %2 = icmp sgt <16 x i16> %0, %1
5564  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5565  %4 = bitcast <32 x i1> %3 to i32
5566  ret i32 %4
5567}
5568
5569define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5570; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5571; VLX:       # %bb.0: # %entry
5572; VLX-NEXT:    kmovd %edi, %k1
5573; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5574; VLX-NEXT:    kmovd %k0, %eax
5575; VLX-NEXT:    vzeroupper
5576; VLX-NEXT:    retq
5577;
5578; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5579; NoVLX:       # %bb.0: # %entry
5580; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5581; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5582; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5583; NoVLX-NEXT:    kmovw %k0, %eax
5584; NoVLX-NEXT:    andl %edi, %eax
5585; NoVLX-NEXT:    vzeroupper
5586; NoVLX-NEXT:    retq
5587entry:
5588  %0 = bitcast <4 x i64> %__a to <16 x i16>
5589  %1 = bitcast <4 x i64> %__b to <16 x i16>
5590  %2 = icmp sgt <16 x i16> %0, %1
5591  %3 = bitcast i16 %__u to <16 x i1>
5592  %4 = and <16 x i1> %2, %3
5593  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5594  %6 = bitcast <32 x i1> %5 to i32
5595  ret i32 %6
5596}
5597
5598define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5599; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5600; VLX:       # %bb.0: # %entry
5601; VLX-NEXT:    kmovd %edi, %k1
5602; VLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5603; VLX-NEXT:    kmovd %k0, %eax
5604; VLX-NEXT:    vzeroupper
5605; VLX-NEXT:    retq
5606;
5607; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5608; NoVLX:       # %bb.0: # %entry
5609; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm0
5610; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5611; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5612; NoVLX-NEXT:    kmovw %k0, %eax
5613; NoVLX-NEXT:    andl %edi, %eax
5614; NoVLX-NEXT:    vzeroupper
5615; NoVLX-NEXT:    retq
5616entry:
5617  %0 = bitcast <4 x i64> %__a to <16 x i16>
5618  %load = load <4 x i64>, ptr %__b
5619  %1 = bitcast <4 x i64> %load to <16 x i16>
5620  %2 = icmp sgt <16 x i16> %0, %1
5621  %3 = bitcast i16 %__u to <16 x i1>
5622  %4 = and <16 x i1> %2, %3
5623  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5624  %6 = bitcast <32 x i1> %5 to i32
5625  ret i32 %6
5626}
5627
5628
5629define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5630; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5631; VLX:       # %bb.0: # %entry
5632; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
5633; VLX-NEXT:    kmovq %k0, %rax
5634; VLX-NEXT:    vzeroupper
5635; VLX-NEXT:    retq
5636;
5637; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5638; NoVLX:       # %bb.0: # %entry
5639; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5640; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5641; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5642; NoVLX-NEXT:    kmovw %k0, %eax
5643; NoVLX-NEXT:    vzeroupper
5644; NoVLX-NEXT:    retq
5645entry:
5646  %0 = bitcast <4 x i64> %__a to <16 x i16>
5647  %1 = bitcast <4 x i64> %__b to <16 x i16>
5648  %2 = icmp sgt <16 x i16> %0, %1
5649  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5650  %4 = bitcast <64 x i1> %3 to i64
5651  ret i64 %4
5652}
5653
5654define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5655; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5656; VLX:       # %bb.0: # %entry
5657; VLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %k0
5658; VLX-NEXT:    kmovq %k0, %rax
5659; VLX-NEXT:    vzeroupper
5660; VLX-NEXT:    retq
5661;
5662; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5663; NoVLX:       # %bb.0: # %entry
5664; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0
5665; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5666; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5667; NoVLX-NEXT:    kmovw %k0, %eax
5668; NoVLX-NEXT:    vzeroupper
5669; NoVLX-NEXT:    retq
5670entry:
5671  %0 = bitcast <4 x i64> %__a to <16 x i16>
5672  %load = load <4 x i64>, ptr %__b
5673  %1 = bitcast <4 x i64> %load to <16 x i16>
5674  %2 = icmp sgt <16 x i16> %0, %1
5675  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5676  %4 = bitcast <64 x i1> %3 to i64
5677  ret i64 %4
5678}
5679
5680define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5681; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5682; VLX:       # %bb.0: # %entry
5683; VLX-NEXT:    kmovd %edi, %k1
5684; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5685; VLX-NEXT:    kmovq %k0, %rax
5686; VLX-NEXT:    vzeroupper
5687; VLX-NEXT:    retq
5688;
5689; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5690; NoVLX:       # %bb.0: # %entry
5691; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5692; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5693; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5694; NoVLX-NEXT:    kmovw %k0, %eax
5695; NoVLX-NEXT:    andl %edi, %eax
5696; NoVLX-NEXT:    vzeroupper
5697; NoVLX-NEXT:    retq
5698entry:
5699  %0 = bitcast <4 x i64> %__a to <16 x i16>
5700  %1 = bitcast <4 x i64> %__b to <16 x i16>
5701  %2 = icmp sgt <16 x i16> %0, %1
5702  %3 = bitcast i16 %__u to <16 x i1>
5703  %4 = and <16 x i1> %2, %3
5704  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5705  %6 = bitcast <64 x i1> %5 to i64
5706  ret i64 %6
5707}
5708
5709define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5710; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5711; VLX:       # %bb.0: # %entry
5712; VLX-NEXT:    kmovd %edi, %k1
5713; VLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5714; VLX-NEXT:    kmovq %k0, %rax
5715; VLX-NEXT:    vzeroupper
5716; VLX-NEXT:    retq
5717;
5718; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5719; NoVLX:       # %bb.0: # %entry
5720; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm0
5721; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5722; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5723; NoVLX-NEXT:    kmovw %k0, %eax
5724; NoVLX-NEXT:    andl %edi, %eax
5725; NoVLX-NEXT:    vzeroupper
5726; NoVLX-NEXT:    retq
5727entry:
5728  %0 = bitcast <4 x i64> %__a to <16 x i16>
5729  %load = load <4 x i64>, ptr %__b
5730  %1 = bitcast <4 x i64> %load to <16 x i16>
5731  %2 = icmp sgt <16 x i16> %0, %1
5732  %3 = bitcast i16 %__u to <16 x i1>
5733  %4 = and <16 x i1> %2, %3
5734  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5735  %6 = bitcast <64 x i1> %5 to i64
5736  ret i64 %6
5737}
5738
5739
5740define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5741; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5742; VLX:       # %bb.0: # %entry
5743; VLX-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
5744; VLX-NEXT:    kmovq %k0, %rax
5745; VLX-NEXT:    vzeroupper
5746; VLX-NEXT:    retq
5747;
5748; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5749; NoVLX:       # %bb.0: # %entry
5750; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm2
5751; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
5752; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
5753; NoVLX-NEXT:    kmovw %k0, %ecx
5754; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
5755; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5756; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5757; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5758; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5759; NoVLX-NEXT:    kmovw %k0, %eax
5760; NoVLX-NEXT:    shll $16, %eax
5761; NoVLX-NEXT:    orl %ecx, %eax
5762; NoVLX-NEXT:    vzeroupper
5763; NoVLX-NEXT:    retq
5764entry:
5765  %0 = bitcast <8 x i64> %__a to <32 x i16>
5766  %1 = bitcast <8 x i64> %__b to <32 x i16>
5767  %2 = icmp sgt <32 x i16> %0, %1
5768  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5769  %4 = bitcast <64 x i1> %3 to i64
5770  ret i64 %4
5771}
5772
5773define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
5774; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5775; VLX:       # %bb.0: # %entry
5776; VLX-NEXT:    vpcmpgtw (%rdi), %zmm0, %k0
5777; VLX-NEXT:    kmovq %k0, %rax
5778; VLX-NEXT:    vzeroupper
5779; VLX-NEXT:    retq
5780;
5781; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5782; NoVLX:       # %bb.0: # %entry
5783; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm1
5784; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
5785; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5786; NoVLX-NEXT:    kmovw %k0, %ecx
5787; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5788; NoVLX-NEXT:    vpcmpgtw 32(%rdi), %ymm0, %ymm0
5789; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5790; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5791; NoVLX-NEXT:    kmovw %k0, %eax
5792; NoVLX-NEXT:    shll $16, %eax
5793; NoVLX-NEXT:    orl %ecx, %eax
5794; NoVLX-NEXT:    vzeroupper
5795; NoVLX-NEXT:    retq
5796entry:
5797  %0 = bitcast <8 x i64> %__a to <32 x i16>
5798  %load = load <8 x i64>, ptr %__b
5799  %1 = bitcast <8 x i64> %load to <32 x i16>
5800  %2 = icmp sgt <32 x i16> %0, %1
5801  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5802  %4 = bitcast <64 x i1> %3 to i64
5803  ret i64 %4
5804}
5805
5806define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5807; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5808; VLX:       # %bb.0: # %entry
5809; VLX-NEXT:    kmovd %edi, %k1
5810; VLX-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5811; VLX-NEXT:    kmovq %k0, %rax
5812; VLX-NEXT:    vzeroupper
5813; VLX-NEXT:    retq
5814;
5815; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5816; NoVLX:       # %bb.0: # %entry
5817; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm2
5818; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
5819; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
5820; NoVLX-NEXT:    kmovw %k0, %eax
5821; NoVLX-NEXT:    andl %edi, %eax
5822; NoVLX-NEXT:    shrl $16, %edi
5823; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
5824; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5825; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5826; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5827; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5828; NoVLX-NEXT:    kmovw %k0, %ecx
5829; NoVLX-NEXT:    andl %edi, %ecx
5830; NoVLX-NEXT:    shll $16, %ecx
5831; NoVLX-NEXT:    movzwl %ax, %eax
5832; NoVLX-NEXT:    orl %ecx, %eax
5833; NoVLX-NEXT:    vzeroupper
5834; NoVLX-NEXT:    retq
5835entry:
5836  %0 = bitcast <8 x i64> %__a to <32 x i16>
5837  %1 = bitcast <8 x i64> %__b to <32 x i16>
5838  %2 = icmp sgt <32 x i16> %0, %1
5839  %3 = bitcast i32 %__u to <32 x i1>
5840  %4 = and <32 x i1> %2, %3
5841  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5842  %6 = bitcast <64 x i1> %5 to i64
5843  ret i64 %6
5844}
5845
5846define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
5847; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5848; VLX:       # %bb.0: # %entry
5849; VLX-NEXT:    kmovd %edi, %k1
5850; VLX-NEXT:    vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5851; VLX-NEXT:    kmovq %k0, %rax
5852; VLX-NEXT:    vzeroupper
5853; VLX-NEXT:    retq
5854;
5855; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5856; NoVLX:       # %bb.0: # %entry
5857; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm1
5858; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
5859; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5860; NoVLX-NEXT:    kmovw %k0, %eax
5861; NoVLX-NEXT:    andl %edi, %eax
5862; NoVLX-NEXT:    shrl $16, %edi
5863; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5864; NoVLX-NEXT:    vpcmpgtw 32(%rsi), %ymm0, %ymm0
5865; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5866; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5867; NoVLX-NEXT:    kmovw %k0, %ecx
5868; NoVLX-NEXT:    andl %edi, %ecx
5869; NoVLX-NEXT:    shll $16, %ecx
5870; NoVLX-NEXT:    movzwl %ax, %eax
5871; NoVLX-NEXT:    orl %ecx, %eax
5872; NoVLX-NEXT:    vzeroupper
5873; NoVLX-NEXT:    retq
5874entry:
5875  %0 = bitcast <8 x i64> %__a to <32 x i16>
5876  %load = load <8 x i64>, ptr %__b
5877  %1 = bitcast <8 x i64> %load to <32 x i16>
5878  %2 = icmp sgt <32 x i16> %0, %1
5879  %3 = bitcast i32 %__u to <32 x i1>
5880  %4 = and <32 x i1> %2, %3
5881  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5882  %6 = bitcast <64 x i1> %5 to i64
5883  ret i64 %6
5884}
5885
5886
5887define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5888; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5889; VLX:       # %bb.0: # %entry
5890; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
5891; VLX-NEXT:    kmovd %k0, %eax
5892; VLX-NEXT:    # kill: def $al killed $al killed $eax
5893; VLX-NEXT:    retq
5894;
5895; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5896; NoVLX:       # %bb.0: # %entry
5897; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
5898; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5899; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
5900; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5901; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5902; NoVLX-NEXT:    kmovw %k0, %eax
5903; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5904; NoVLX-NEXT:    vzeroupper
5905; NoVLX-NEXT:    retq
5906entry:
5907  %0 = bitcast <2 x i64> %__a to <4 x i32>
5908  %1 = bitcast <2 x i64> %__b to <4 x i32>
5909  %2 = icmp sgt <4 x i32> %0, %1
5910  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5911  %4 = bitcast <8 x i1> %3 to i8
5912  ret i8 %4
5913}
5914
5915define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5916; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5917; VLX:       # %bb.0: # %entry
5918; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
5919; VLX-NEXT:    kmovd %k0, %eax
5920; VLX-NEXT:    # kill: def $al killed $al killed $eax
5921; VLX-NEXT:    retq
5922;
5923; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5924; NoVLX:       # %bb.0: # %entry
5925; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5926; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
5927; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
5928; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5929; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5930; NoVLX-NEXT:    kmovw %k0, %eax
5931; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5932; NoVLX-NEXT:    vzeroupper
5933; NoVLX-NEXT:    retq
5934entry:
5935  %0 = bitcast <2 x i64> %__a to <4 x i32>
5936  %load = load <2 x i64>, ptr %__b
5937  %1 = bitcast <2 x i64> %load to <4 x i32>
5938  %2 = icmp sgt <4 x i32> %0, %1
5939  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5940  %4 = bitcast <8 x i1> %3 to i8
5941  ret i8 %4
5942}
5943
5944define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5945; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5946; VLX:       # %bb.0: # %entry
5947; VLX-NEXT:    kmovd %edi, %k1
5948; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
5949; VLX-NEXT:    kmovd %k0, %eax
5950; VLX-NEXT:    # kill: def $al killed $al killed $eax
5951; VLX-NEXT:    retq
5952;
5953; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5954; NoVLX:       # %bb.0: # %entry
5955; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
5956; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5957; NoVLX-NEXT:    kmovw %edi, %k1
5958; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5959; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5960; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5961; NoVLX-NEXT:    kmovw %k0, %eax
5962; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5963; NoVLX-NEXT:    vzeroupper
5964; NoVLX-NEXT:    retq
5965entry:
5966  %0 = bitcast <2 x i64> %__a to <4 x i32>
5967  %1 = bitcast <2 x i64> %__b to <4 x i32>
5968  %2 = icmp sgt <4 x i32> %0, %1
5969  %3 = bitcast i8 %__u to <8 x i1>
5970  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5971  %4 = and <4 x i1> %2, %extract.i
5972  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5973  %6 = bitcast <8 x i1> %5 to i8
5974  ret i8 %6
5975}
5976
5977define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5978; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5979; VLX:       # %bb.0: # %entry
5980; VLX-NEXT:    kmovd %edi, %k1
5981; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
5982; VLX-NEXT:    kmovd %k0, %eax
5983; VLX-NEXT:    # kill: def $al killed $al killed $eax
5984; VLX-NEXT:    retq
5985;
5986; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5987; NoVLX:       # %bb.0: # %entry
5988; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5989; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
5990; NoVLX-NEXT:    kmovw %edi, %k1
5991; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5992; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5993; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5994; NoVLX-NEXT:    kmovw %k0, %eax
5995; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5996; NoVLX-NEXT:    vzeroupper
5997; NoVLX-NEXT:    retq
5998entry:
5999  %0 = bitcast <2 x i64> %__a to <4 x i32>
6000  %load = load <2 x i64>, ptr %__b
6001  %1 = bitcast <2 x i64> %load to <4 x i32>
6002  %2 = icmp sgt <4 x i32> %0, %1
6003  %3 = bitcast i8 %__u to <8 x i1>
6004  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6005  %4 = and <4 x i1> %2, %extract.i
6006  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6007  %6 = bitcast <8 x i1> %5 to i8
6008  ret i8 %6
6009}
6010
6011
6012define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6013; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6014; VLX:       # %bb.0: # %entry
6015; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6016; VLX-NEXT:    kmovd %k0, %eax
6017; VLX-NEXT:    # kill: def $al killed $al killed $eax
6018; VLX-NEXT:    retq
6019;
6020; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6021; NoVLX:       # %bb.0: # %entry
6022; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6023; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6024; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6025; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6026; NoVLX-NEXT:    kmovw %k0, %eax
6027; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6028; NoVLX-NEXT:    vzeroupper
6029; NoVLX-NEXT:    retq
6030entry:
6031  %0 = bitcast <2 x i64> %__a to <4 x i32>
6032  %load = load i32, ptr %__b
6033  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6034  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6035  %2 = icmp sgt <4 x i32> %0, %1
6036  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6037  %4 = bitcast <8 x i1> %3 to i8
6038  ret i8 %4
6039}
6040
6041define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6042; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6043; VLX:       # %bb.0: # %entry
6044; VLX-NEXT:    kmovd %edi, %k1
6045; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6046; VLX-NEXT:    kmovd %k0, %eax
6047; VLX-NEXT:    # kill: def $al killed $al killed $eax
6048; VLX-NEXT:    retq
6049;
6050; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6051; NoVLX:       # %bb.0: # %entry
6052; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6053; NoVLX-NEXT:    kmovw %edi, %k1
6054; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6055; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6056; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6057; NoVLX-NEXT:    kmovw %k0, %eax
6058; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6059; NoVLX-NEXT:    vzeroupper
6060; NoVLX-NEXT:    retq
6061entry:
6062  %0 = bitcast <2 x i64> %__a to <4 x i32>
6063  %load = load i32, ptr %__b
6064  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6065  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6066  %2 = icmp sgt <4 x i32> %0, %1
6067  %3 = bitcast i8 %__u to <8 x i1>
6068  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6069  %4 = and <4 x i1> %extract.i, %2
6070  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6071  %6 = bitcast <8 x i1> %5 to i8
6072  ret i8 %6
6073}
6074
6075
6076define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6077; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6078; VLX:       # %bb.0: # %entry
6079; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6080; VLX-NEXT:    kmovd %k0, %eax
6081; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6082; VLX-NEXT:    retq
6083;
6084; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6085; NoVLX:       # %bb.0: # %entry
6086; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6087; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6088; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6089; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6090; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6091; NoVLX-NEXT:    kmovw %k0, %eax
6092; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6093; NoVLX-NEXT:    vzeroupper
6094; NoVLX-NEXT:    retq
6095entry:
6096  %0 = bitcast <2 x i64> %__a to <4 x i32>
6097  %1 = bitcast <2 x i64> %__b to <4 x i32>
6098  %2 = icmp sgt <4 x i32> %0, %1
6099  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6100  %4 = bitcast <16 x i1> %3 to i16
6101  ret i16 %4
6102}
6103
6104define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6105; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6106; VLX:       # %bb.0: # %entry
6107; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6108; VLX-NEXT:    kmovd %k0, %eax
6109; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6110; VLX-NEXT:    retq
6111;
6112; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6113; NoVLX:       # %bb.0: # %entry
6114; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6115; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6116; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6117; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6118; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6119; NoVLX-NEXT:    kmovw %k0, %eax
6120; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6121; NoVLX-NEXT:    vzeroupper
6122; NoVLX-NEXT:    retq
6123entry:
6124  %0 = bitcast <2 x i64> %__a to <4 x i32>
6125  %load = load <2 x i64>, ptr %__b
6126  %1 = bitcast <2 x i64> %load to <4 x i32>
6127  %2 = icmp sgt <4 x i32> %0, %1
6128  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6129  %4 = bitcast <16 x i1> %3 to i16
6130  ret i16 %4
6131}
6132
6133define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6134; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6135; VLX:       # %bb.0: # %entry
6136; VLX-NEXT:    kmovd %edi, %k1
6137; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6138; VLX-NEXT:    kmovd %k0, %eax
6139; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6140; VLX-NEXT:    retq
6141;
6142; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6143; NoVLX:       # %bb.0: # %entry
6144; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6145; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6146; NoVLX-NEXT:    kmovw %edi, %k1
6147; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6148; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6149; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6150; NoVLX-NEXT:    kmovw %k0, %eax
6151; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6152; NoVLX-NEXT:    vzeroupper
6153; NoVLX-NEXT:    retq
6154entry:
6155  %0 = bitcast <2 x i64> %__a to <4 x i32>
6156  %1 = bitcast <2 x i64> %__b to <4 x i32>
6157  %2 = icmp sgt <4 x i32> %0, %1
6158  %3 = bitcast i8 %__u to <8 x i1>
6159  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6160  %4 = and <4 x i1> %2, %extract.i
6161  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6162  %6 = bitcast <16 x i1> %5 to i16
6163  ret i16 %6
6164}
6165
6166define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6167; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6168; VLX:       # %bb.0: # %entry
6169; VLX-NEXT:    kmovd %edi, %k1
6170; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6171; VLX-NEXT:    kmovd %k0, %eax
6172; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6173; VLX-NEXT:    retq
6174;
6175; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6176; NoVLX:       # %bb.0: # %entry
6177; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6178; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6179; NoVLX-NEXT:    kmovw %edi, %k1
6180; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6181; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6182; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6183; NoVLX-NEXT:    kmovw %k0, %eax
6184; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6185; NoVLX-NEXT:    vzeroupper
6186; NoVLX-NEXT:    retq
6187entry:
6188  %0 = bitcast <2 x i64> %__a to <4 x i32>
6189  %load = load <2 x i64>, ptr %__b
6190  %1 = bitcast <2 x i64> %load to <4 x i32>
6191  %2 = icmp sgt <4 x i32> %0, %1
6192  %3 = bitcast i8 %__u to <8 x i1>
6193  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6194  %4 = and <4 x i1> %2, %extract.i
6195  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6196  %6 = bitcast <16 x i1> %5 to i16
6197  ret i16 %6
6198}
6199
6200
6201define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6202; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6203; VLX:       # %bb.0: # %entry
6204; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6205; VLX-NEXT:    kmovd %k0, %eax
6206; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6207; VLX-NEXT:    retq
6208;
6209; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6210; NoVLX:       # %bb.0: # %entry
6211; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6212; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6213; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6214; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6215; NoVLX-NEXT:    kmovw %k0, %eax
6216; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6217; NoVLX-NEXT:    vzeroupper
6218; NoVLX-NEXT:    retq
6219entry:
6220  %0 = bitcast <2 x i64> %__a to <4 x i32>
6221  %load = load i32, ptr %__b
6222  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6223  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6224  %2 = icmp sgt <4 x i32> %0, %1
6225  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6226  %4 = bitcast <16 x i1> %3 to i16
6227  ret i16 %4
6228}
6229
6230define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6231; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6232; VLX:       # %bb.0: # %entry
6233; VLX-NEXT:    kmovd %edi, %k1
6234; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6235; VLX-NEXT:    kmovd %k0, %eax
6236; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6237; VLX-NEXT:    retq
6238;
6239; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6240; NoVLX:       # %bb.0: # %entry
6241; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6242; NoVLX-NEXT:    kmovw %edi, %k1
6243; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6244; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6245; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6246; NoVLX-NEXT:    kmovw %k0, %eax
6247; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6248; NoVLX-NEXT:    vzeroupper
6249; NoVLX-NEXT:    retq
6250entry:
6251  %0 = bitcast <2 x i64> %__a to <4 x i32>
6252  %load = load i32, ptr %__b
6253  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6254  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6255  %2 = icmp sgt <4 x i32> %0, %1
6256  %3 = bitcast i8 %__u to <8 x i1>
6257  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6258  %4 = and <4 x i1> %extract.i, %2
6259  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6260  %6 = bitcast <16 x i1> %5 to i16
6261  ret i16 %6
6262}
6263
6264
6265define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6266; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6267; VLX:       # %bb.0: # %entry
6268; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6269; VLX-NEXT:    kmovd %k0, %eax
6270; VLX-NEXT:    retq
6271;
6272; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6273; NoVLX:       # %bb.0: # %entry
6274; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6275; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6276; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6277; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6278; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6279; NoVLX-NEXT:    kmovw %k0, %eax
6280; NoVLX-NEXT:    vzeroupper
6281; NoVLX-NEXT:    retq
6282entry:
6283  %0 = bitcast <2 x i64> %__a to <4 x i32>
6284  %1 = bitcast <2 x i64> %__b to <4 x i32>
6285  %2 = icmp sgt <4 x i32> %0, %1
6286  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6287  %4 = bitcast <32 x i1> %3 to i32
6288  ret i32 %4
6289}
6290
6291define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6292; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6293; VLX:       # %bb.0: # %entry
6294; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6295; VLX-NEXT:    kmovd %k0, %eax
6296; VLX-NEXT:    retq
6297;
6298; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6299; NoVLX:       # %bb.0: # %entry
6300; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6301; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6302; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6303; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6304; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6305; NoVLX-NEXT:    kmovw %k0, %eax
6306; NoVLX-NEXT:    vzeroupper
6307; NoVLX-NEXT:    retq
6308entry:
6309  %0 = bitcast <2 x i64> %__a to <4 x i32>
6310  %load = load <2 x i64>, ptr %__b
6311  %1 = bitcast <2 x i64> %load to <4 x i32>
6312  %2 = icmp sgt <4 x i32> %0, %1
6313  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6314  %4 = bitcast <32 x i1> %3 to i32
6315  ret i32 %4
6316}
6317
6318define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6319; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6320; VLX:       # %bb.0: # %entry
6321; VLX-NEXT:    kmovd %edi, %k1
6322; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6323; VLX-NEXT:    kmovd %k0, %eax
6324; VLX-NEXT:    retq
6325;
6326; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6327; NoVLX:       # %bb.0: # %entry
6328; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6329; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6330; NoVLX-NEXT:    kmovw %edi, %k1
6331; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6332; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6333; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6334; NoVLX-NEXT:    kmovw %k0, %eax
6335; NoVLX-NEXT:    vzeroupper
6336; NoVLX-NEXT:    retq
6337entry:
6338  %0 = bitcast <2 x i64> %__a to <4 x i32>
6339  %1 = bitcast <2 x i64> %__b to <4 x i32>
6340  %2 = icmp sgt <4 x i32> %0, %1
6341  %3 = bitcast i8 %__u to <8 x i1>
6342  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6343  %4 = and <4 x i1> %2, %extract.i
6344  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6345  %6 = bitcast <32 x i1> %5 to i32
6346  ret i32 %6
6347}
6348
6349define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6350; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6351; VLX:       # %bb.0: # %entry
6352; VLX-NEXT:    kmovd %edi, %k1
6353; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6354; VLX-NEXT:    kmovd %k0, %eax
6355; VLX-NEXT:    retq
6356;
6357; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6358; NoVLX:       # %bb.0: # %entry
6359; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6360; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6361; NoVLX-NEXT:    kmovw %edi, %k1
6362; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6363; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6364; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6365; NoVLX-NEXT:    kmovw %k0, %eax
6366; NoVLX-NEXT:    vzeroupper
6367; NoVLX-NEXT:    retq
6368entry:
6369  %0 = bitcast <2 x i64> %__a to <4 x i32>
6370  %load = load <2 x i64>, ptr %__b
6371  %1 = bitcast <2 x i64> %load to <4 x i32>
6372  %2 = icmp sgt <4 x i32> %0, %1
6373  %3 = bitcast i8 %__u to <8 x i1>
6374  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6375  %4 = and <4 x i1> %2, %extract.i
6376  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6377  %6 = bitcast <32 x i1> %5 to i32
6378  ret i32 %6
6379}
6380
6381
6382define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6383; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6384; VLX:       # %bb.0: # %entry
6385; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6386; VLX-NEXT:    kmovd %k0, %eax
6387; VLX-NEXT:    retq
6388;
6389; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6390; NoVLX:       # %bb.0: # %entry
6391; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6392; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6393; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6394; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6395; NoVLX-NEXT:    kmovw %k0, %eax
6396; NoVLX-NEXT:    vzeroupper
6397; NoVLX-NEXT:    retq
6398entry:
6399  %0 = bitcast <2 x i64> %__a to <4 x i32>
6400  %load = load i32, ptr %__b
6401  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6402  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6403  %2 = icmp sgt <4 x i32> %0, %1
6404  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6405  %4 = bitcast <32 x i1> %3 to i32
6406  ret i32 %4
6407}
6408
6409define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6410; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6411; VLX:       # %bb.0: # %entry
6412; VLX-NEXT:    kmovd %edi, %k1
6413; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6414; VLX-NEXT:    kmovd %k0, %eax
6415; VLX-NEXT:    retq
6416;
6417; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6418; NoVLX:       # %bb.0: # %entry
6419; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6420; NoVLX-NEXT:    kmovw %edi, %k1
6421; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6422; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6423; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6424; NoVLX-NEXT:    kmovw %k0, %eax
6425; NoVLX-NEXT:    vzeroupper
6426; NoVLX-NEXT:    retq
6427entry:
6428  %0 = bitcast <2 x i64> %__a to <4 x i32>
6429  %load = load i32, ptr %__b
6430  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6431  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6432  %2 = icmp sgt <4 x i32> %0, %1
6433  %3 = bitcast i8 %__u to <8 x i1>
6434  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6435  %4 = and <4 x i1> %extract.i, %2
6436  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6437  %6 = bitcast <32 x i1> %5 to i32
6438  ret i32 %6
6439}
6440
6441
6442define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6443; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6444; VLX:       # %bb.0: # %entry
6445; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6446; VLX-NEXT:    kmovq %k0, %rax
6447; VLX-NEXT:    retq
6448;
6449; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6450; NoVLX:       # %bb.0: # %entry
6451; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6452; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6453; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6454; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6455; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6456; NoVLX-NEXT:    kmovw %k0, %eax
6457; NoVLX-NEXT:    vzeroupper
6458; NoVLX-NEXT:    retq
6459entry:
6460  %0 = bitcast <2 x i64> %__a to <4 x i32>
6461  %1 = bitcast <2 x i64> %__b to <4 x i32>
6462  %2 = icmp sgt <4 x i32> %0, %1
6463  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6464  %4 = bitcast <64 x i1> %3 to i64
6465  ret i64 %4
6466}
6467
6468define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6469; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6470; VLX:       # %bb.0: # %entry
6471; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6472; VLX-NEXT:    kmovq %k0, %rax
6473; VLX-NEXT:    retq
6474;
6475; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6476; NoVLX:       # %bb.0: # %entry
6477; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6478; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6479; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6480; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6481; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6482; NoVLX-NEXT:    kmovw %k0, %eax
6483; NoVLX-NEXT:    vzeroupper
6484; NoVLX-NEXT:    retq
6485entry:
6486  %0 = bitcast <2 x i64> %__a to <4 x i32>
6487  %load = load <2 x i64>, ptr %__b
6488  %1 = bitcast <2 x i64> %load to <4 x i32>
6489  %2 = icmp sgt <4 x i32> %0, %1
6490  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6491  %4 = bitcast <64 x i1> %3 to i64
6492  ret i64 %4
6493}
6494
6495define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6496; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6497; VLX:       # %bb.0: # %entry
6498; VLX-NEXT:    kmovd %edi, %k1
6499; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6500; VLX-NEXT:    kmovq %k0, %rax
6501; VLX-NEXT:    retq
6502;
6503; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6504; NoVLX:       # %bb.0: # %entry
6505; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6506; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6507; NoVLX-NEXT:    kmovw %edi, %k1
6508; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6509; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6510; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6511; NoVLX-NEXT:    kmovw %k0, %eax
6512; NoVLX-NEXT:    vzeroupper
6513; NoVLX-NEXT:    retq
6514entry:
6515  %0 = bitcast <2 x i64> %__a to <4 x i32>
6516  %1 = bitcast <2 x i64> %__b to <4 x i32>
6517  %2 = icmp sgt <4 x i32> %0, %1
6518  %3 = bitcast i8 %__u to <8 x i1>
6519  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6520  %4 = and <4 x i1> %2, %extract.i
6521  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6522  %6 = bitcast <64 x i1> %5 to i64
6523  ret i64 %6
6524}
6525
6526define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6527; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6528; VLX:       # %bb.0: # %entry
6529; VLX-NEXT:    kmovd %edi, %k1
6530; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6531; VLX-NEXT:    kmovq %k0, %rax
6532; VLX-NEXT:    retq
6533;
6534; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6535; NoVLX:       # %bb.0: # %entry
6536; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6537; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6538; NoVLX-NEXT:    kmovw %edi, %k1
6539; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6540; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6541; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6542; NoVLX-NEXT:    kmovw %k0, %eax
6543; NoVLX-NEXT:    vzeroupper
6544; NoVLX-NEXT:    retq
6545entry:
6546  %0 = bitcast <2 x i64> %__a to <4 x i32>
6547  %load = load <2 x i64>, ptr %__b
6548  %1 = bitcast <2 x i64> %load to <4 x i32>
6549  %2 = icmp sgt <4 x i32> %0, %1
6550  %3 = bitcast i8 %__u to <8 x i1>
6551  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6552  %4 = and <4 x i1> %2, %extract.i
6553  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6554  %6 = bitcast <64 x i1> %5 to i64
6555  ret i64 %6
6556}
6557
6558
6559define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6560; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6561; VLX:       # %bb.0: # %entry
6562; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6563; VLX-NEXT:    kmovq %k0, %rax
6564; VLX-NEXT:    retq
6565;
6566; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6567; NoVLX:       # %bb.0: # %entry
6568; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6569; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6570; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6571; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6572; NoVLX-NEXT:    kmovw %k0, %eax
6573; NoVLX-NEXT:    vzeroupper
6574; NoVLX-NEXT:    retq
6575entry:
6576  %0 = bitcast <2 x i64> %__a to <4 x i32>
6577  %load = load i32, ptr %__b
6578  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6579  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6580  %2 = icmp sgt <4 x i32> %0, %1
6581  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6582  %4 = bitcast <64 x i1> %3 to i64
6583  ret i64 %4
6584}
6585
6586define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6587; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6588; VLX:       # %bb.0: # %entry
6589; VLX-NEXT:    kmovd %edi, %k1
6590; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6591; VLX-NEXT:    kmovq %k0, %rax
6592; VLX-NEXT:    retq
6593;
6594; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6595; NoVLX:       # %bb.0: # %entry
6596; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6597; NoVLX-NEXT:    kmovw %edi, %k1
6598; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6599; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6600; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6601; NoVLX-NEXT:    kmovw %k0, %eax
6602; NoVLX-NEXT:    vzeroupper
6603; NoVLX-NEXT:    retq
6604entry:
6605  %0 = bitcast <2 x i64> %__a to <4 x i32>
6606  %load = load i32, ptr %__b
6607  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6608  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6609  %2 = icmp sgt <4 x i32> %0, %1
6610  %3 = bitcast i8 %__u to <8 x i1>
6611  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6612  %4 = and <4 x i1> %extract.i, %2
6613  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6614  %6 = bitcast <64 x i1> %5 to i64
6615  ret i64 %6
6616}
6617
6618
6619define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6620; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6621; VLX:       # %bb.0: # %entry
6622; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
6623; VLX-NEXT:    kmovd %k0, %eax
6624; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6625; VLX-NEXT:    vzeroupper
6626; VLX-NEXT:    retq
6627;
6628; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6629; NoVLX:       # %bb.0: # %entry
6630; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6631; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6632; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6633; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6634; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6635; NoVLX-NEXT:    kmovw %k0, %eax
6636; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6637; NoVLX-NEXT:    vzeroupper
6638; NoVLX-NEXT:    retq
6639entry:
6640  %0 = bitcast <4 x i64> %__a to <8 x i32>
6641  %1 = bitcast <4 x i64> %__b to <8 x i32>
6642  %2 = icmp sgt <8 x i32> %0, %1
6643  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6644  %4 = bitcast <16 x i1> %3 to i16
6645  ret i16 %4
6646}
6647
6648define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6649; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6650; VLX:       # %bb.0: # %entry
6651; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
6652; VLX-NEXT:    kmovd %k0, %eax
6653; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6654; VLX-NEXT:    vzeroupper
6655; VLX-NEXT:    retq
6656;
6657; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6658; NoVLX:       # %bb.0: # %entry
6659; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6660; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
6661; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6662; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6663; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6664; NoVLX-NEXT:    kmovw %k0, %eax
6665; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6666; NoVLX-NEXT:    vzeroupper
6667; NoVLX-NEXT:    retq
6668entry:
6669  %0 = bitcast <4 x i64> %__a to <8 x i32>
6670  %load = load <4 x i64>, ptr %__b
6671  %1 = bitcast <4 x i64> %load to <8 x i32>
6672  %2 = icmp sgt <8 x i32> %0, %1
6673  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674  %4 = bitcast <16 x i1> %3 to i16
6675  ret i16 %4
6676}
6677
6678define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6679; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6680; VLX:       # %bb.0: # %entry
6681; VLX-NEXT:    kmovd %edi, %k1
6682; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6683; VLX-NEXT:    kmovd %k0, %eax
6684; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6685; VLX-NEXT:    vzeroupper
6686; VLX-NEXT:    retq
6687;
6688; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6689; NoVLX:       # %bb.0: # %entry
6690; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6691; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6692; NoVLX-NEXT:    kmovw %edi, %k1
6693; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6694; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6695; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6696; NoVLX-NEXT:    kmovw %k0, %eax
6697; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6698; NoVLX-NEXT:    vzeroupper
6699; NoVLX-NEXT:    retq
6700entry:
6701  %0 = bitcast <4 x i64> %__a to <8 x i32>
6702  %1 = bitcast <4 x i64> %__b to <8 x i32>
6703  %2 = icmp sgt <8 x i32> %0, %1
6704  %3 = bitcast i8 %__u to <8 x i1>
6705  %4 = and <8 x i1> %2, %3
6706  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6707  %6 = bitcast <16 x i1> %5 to i16
6708  ret i16 %6
6709}
6710
6711define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6712; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6713; VLX:       # %bb.0: # %entry
6714; VLX-NEXT:    kmovd %edi, %k1
6715; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6716; VLX-NEXT:    kmovd %k0, %eax
6717; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6718; VLX-NEXT:    vzeroupper
6719; VLX-NEXT:    retq
6720;
6721; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6722; NoVLX:       # %bb.0: # %entry
6723; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6724; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
6725; NoVLX-NEXT:    kmovw %edi, %k1
6726; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6727; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6728; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6729; NoVLX-NEXT:    kmovw %k0, %eax
6730; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6731; NoVLX-NEXT:    vzeroupper
6732; NoVLX-NEXT:    retq
6733entry:
6734  %0 = bitcast <4 x i64> %__a to <8 x i32>
6735  %load = load <4 x i64>, ptr %__b
6736  %1 = bitcast <4 x i64> %load to <8 x i32>
6737  %2 = icmp sgt <8 x i32> %0, %1
6738  %3 = bitcast i8 %__u to <8 x i1>
6739  %4 = and <8 x i1> %2, %3
6740  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6741  %6 = bitcast <16 x i1> %5 to i16
6742  ret i16 %6
6743}
6744
6745
6746define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6747; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6748; VLX:       # %bb.0: # %entry
6749; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6750; VLX-NEXT:    kmovd %k0, %eax
6751; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6752; VLX-NEXT:    vzeroupper
6753; VLX-NEXT:    retq
6754;
6755; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6756; NoVLX:       # %bb.0: # %entry
6757; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6758; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6759; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6760; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6761; NoVLX-NEXT:    kmovw %k0, %eax
6762; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6763; NoVLX-NEXT:    vzeroupper
6764; NoVLX-NEXT:    retq
6765entry:
6766  %0 = bitcast <4 x i64> %__a to <8 x i32>
6767  %load = load i32, ptr %__b
6768  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6769  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6770  %2 = icmp sgt <8 x i32> %0, %1
6771  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6772  %4 = bitcast <16 x i1> %3 to i16
6773  ret i16 %4
6774}
6775
6776define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6777; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6778; VLX:       # %bb.0: # %entry
6779; VLX-NEXT:    kmovd %edi, %k1
6780; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6781; VLX-NEXT:    kmovd %k0, %eax
6782; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6783; VLX-NEXT:    vzeroupper
6784; VLX-NEXT:    retq
6785;
6786; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6787; NoVLX:       # %bb.0: # %entry
6788; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6789; NoVLX-NEXT:    kmovw %edi, %k1
6790; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6791; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6792; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6793; NoVLX-NEXT:    kmovw %k0, %eax
6794; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6795; NoVLX-NEXT:    vzeroupper
6796; NoVLX-NEXT:    retq
6797entry:
6798  %0 = bitcast <4 x i64> %__a to <8 x i32>
6799  %load = load i32, ptr %__b
6800  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6801  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6802  %2 = icmp sgt <8 x i32> %0, %1
6803  %3 = bitcast i8 %__u to <8 x i1>
6804  %4 = and <8 x i1> %3, %2
6805  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6806  %6 = bitcast <16 x i1> %5 to i16
6807  ret i16 %6
6808}
6809
6810
6811define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6812; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6813; VLX:       # %bb.0: # %entry
6814; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
6815; VLX-NEXT:    kmovd %k0, %eax
6816; VLX-NEXT:    vzeroupper
6817; VLX-NEXT:    retq
6818;
6819; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6820; NoVLX:       # %bb.0: # %entry
6821; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6822; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6823; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6824; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6825; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6826; NoVLX-NEXT:    kmovw %k0, %eax
6827; NoVLX-NEXT:    vzeroupper
6828; NoVLX-NEXT:    retq
6829entry:
6830  %0 = bitcast <4 x i64> %__a to <8 x i32>
6831  %1 = bitcast <4 x i64> %__b to <8 x i32>
6832  %2 = icmp sgt <8 x i32> %0, %1
6833  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6834  %4 = bitcast <32 x i1> %3 to i32
6835  ret i32 %4
6836}
6837
6838define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6839; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6840; VLX:       # %bb.0: # %entry
6841; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
6842; VLX-NEXT:    kmovd %k0, %eax
6843; VLX-NEXT:    vzeroupper
6844; VLX-NEXT:    retq
6845;
6846; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6847; NoVLX:       # %bb.0: # %entry
6848; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6849; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
6850; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6851; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6852; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6853; NoVLX-NEXT:    kmovw %k0, %eax
6854; NoVLX-NEXT:    vzeroupper
6855; NoVLX-NEXT:    retq
6856entry:
6857  %0 = bitcast <4 x i64> %__a to <8 x i32>
6858  %load = load <4 x i64>, ptr %__b
6859  %1 = bitcast <4 x i64> %load to <8 x i32>
6860  %2 = icmp sgt <8 x i32> %0, %1
6861  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6862  %4 = bitcast <32 x i1> %3 to i32
6863  ret i32 %4
6864}
6865
6866define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6867; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6868; VLX:       # %bb.0: # %entry
6869; VLX-NEXT:    kmovd %edi, %k1
6870; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6871; VLX-NEXT:    kmovd %k0, %eax
6872; VLX-NEXT:    vzeroupper
6873; VLX-NEXT:    retq
6874;
6875; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6876; NoVLX:       # %bb.0: # %entry
6877; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6878; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6879; NoVLX-NEXT:    kmovw %edi, %k1
6880; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6881; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6882; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6883; NoVLX-NEXT:    kmovw %k0, %eax
6884; NoVLX-NEXT:    vzeroupper
6885; NoVLX-NEXT:    retq
6886entry:
6887  %0 = bitcast <4 x i64> %__a to <8 x i32>
6888  %1 = bitcast <4 x i64> %__b to <8 x i32>
6889  %2 = icmp sgt <8 x i32> %0, %1
6890  %3 = bitcast i8 %__u to <8 x i1>
6891  %4 = and <8 x i1> %2, %3
6892  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6893  %6 = bitcast <32 x i1> %5 to i32
6894  ret i32 %6
6895}
6896
6897define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6898; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6899; VLX:       # %bb.0: # %entry
6900; VLX-NEXT:    kmovd %edi, %k1
6901; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6902; VLX-NEXT:    kmovd %k0, %eax
6903; VLX-NEXT:    vzeroupper
6904; VLX-NEXT:    retq
6905;
6906; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6907; NoVLX:       # %bb.0: # %entry
6908; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6909; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
6910; NoVLX-NEXT:    kmovw %edi, %k1
6911; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6912; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6913; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6914; NoVLX-NEXT:    kmovw %k0, %eax
6915; NoVLX-NEXT:    vzeroupper
6916; NoVLX-NEXT:    retq
6917entry:
6918  %0 = bitcast <4 x i64> %__a to <8 x i32>
6919  %load = load <4 x i64>, ptr %__b
6920  %1 = bitcast <4 x i64> %load to <8 x i32>
6921  %2 = icmp sgt <8 x i32> %0, %1
6922  %3 = bitcast i8 %__u to <8 x i1>
6923  %4 = and <8 x i1> %2, %3
6924  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6925  %6 = bitcast <32 x i1> %5 to i32
6926  ret i32 %6
6927}
6928
6929
6930define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6931; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6932; VLX:       # %bb.0: # %entry
6933; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6934; VLX-NEXT:    kmovd %k0, %eax
6935; VLX-NEXT:    vzeroupper
6936; VLX-NEXT:    retq
6937;
6938; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6939; NoVLX:       # %bb.0: # %entry
6940; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6941; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6942; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6943; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6944; NoVLX-NEXT:    kmovw %k0, %eax
6945; NoVLX-NEXT:    vzeroupper
6946; NoVLX-NEXT:    retq
6947entry:
6948  %0 = bitcast <4 x i64> %__a to <8 x i32>
6949  %load = load i32, ptr %__b
6950  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6951  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6952  %2 = icmp sgt <8 x i32> %0, %1
6953  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6954  %4 = bitcast <32 x i1> %3 to i32
6955  ret i32 %4
6956}
6957
6958define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6959; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6960; VLX:       # %bb.0: # %entry
6961; VLX-NEXT:    kmovd %edi, %k1
6962; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6963; VLX-NEXT:    kmovd %k0, %eax
6964; VLX-NEXT:    vzeroupper
6965; VLX-NEXT:    retq
6966;
6967; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6968; NoVLX:       # %bb.0: # %entry
6969; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6970; NoVLX-NEXT:    kmovw %edi, %k1
6971; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6972; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6973; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6974; NoVLX-NEXT:    kmovw %k0, %eax
6975; NoVLX-NEXT:    vzeroupper
6976; NoVLX-NEXT:    retq
6977entry:
6978  %0 = bitcast <4 x i64> %__a to <8 x i32>
6979  %load = load i32, ptr %__b
6980  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6981  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6982  %2 = icmp sgt <8 x i32> %0, %1
6983  %3 = bitcast i8 %__u to <8 x i1>
6984  %4 = and <8 x i1> %3, %2
6985  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6986  %6 = bitcast <32 x i1> %5 to i32
6987  ret i32 %6
6988}
6989
6990
6991define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6992; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
6993; VLX:       # %bb.0: # %entry
6994; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
6995; VLX-NEXT:    kmovq %k0, %rax
6996; VLX-NEXT:    vzeroupper
6997; VLX-NEXT:    retq
6998;
6999; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7000; NoVLX:       # %bb.0: # %entry
7001; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
7002; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7003; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7004; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7005; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7006; NoVLX-NEXT:    kmovw %k0, %eax
7007; NoVLX-NEXT:    vzeroupper
7008; NoVLX-NEXT:    retq
7009entry:
7010  %0 = bitcast <4 x i64> %__a to <8 x i32>
7011  %1 = bitcast <4 x i64> %__b to <8 x i32>
7012  %2 = icmp sgt <8 x i32> %0, %1
7013  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7014  %4 = bitcast <64 x i1> %3 to i64
7015  ret i64 %4
7016}
7017
7018define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
7019; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7020; VLX:       # %bb.0: # %entry
7021; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
7022; VLX-NEXT:    kmovq %k0, %rax
7023; VLX-NEXT:    vzeroupper
7024; VLX-NEXT:    retq
7025;
7026; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7027; NoVLX:       # %bb.0: # %entry
7028; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7029; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
7030; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7031; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7032; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7033; NoVLX-NEXT:    kmovw %k0, %eax
7034; NoVLX-NEXT:    vzeroupper
7035; NoVLX-NEXT:    retq
7036entry:
7037  %0 = bitcast <4 x i64> %__a to <8 x i32>
7038  %load = load <4 x i64>, ptr %__b
7039  %1 = bitcast <4 x i64> %load to <8 x i32>
7040  %2 = icmp sgt <8 x i32> %0, %1
7041  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7042  %4 = bitcast <64 x i1> %3 to i64
7043  ret i64 %4
7044}
7045
7046define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7047; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7048; VLX:       # %bb.0: # %entry
7049; VLX-NEXT:    kmovd %edi, %k1
7050; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7051; VLX-NEXT:    kmovq %k0, %rax
7052; VLX-NEXT:    vzeroupper
7053; VLX-NEXT:    retq
7054;
7055; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7056; NoVLX:       # %bb.0: # %entry
7057; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
7058; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7059; NoVLX-NEXT:    kmovw %edi, %k1
7060; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7061; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7062; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7063; NoVLX-NEXT:    kmovw %k0, %eax
7064; NoVLX-NEXT:    vzeroupper
7065; NoVLX-NEXT:    retq
7066entry:
7067  %0 = bitcast <4 x i64> %__a to <8 x i32>
7068  %1 = bitcast <4 x i64> %__b to <8 x i32>
7069  %2 = icmp sgt <8 x i32> %0, %1
7070  %3 = bitcast i8 %__u to <8 x i1>
7071  %4 = and <8 x i1> %2, %3
7072  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7073  %6 = bitcast <64 x i1> %5 to i64
7074  ret i64 %6
7075}
7076
7077define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
7078; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7079; VLX:       # %bb.0: # %entry
7080; VLX-NEXT:    kmovd %edi, %k1
7081; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7082; VLX-NEXT:    kmovq %k0, %rax
7083; VLX-NEXT:    vzeroupper
7084; VLX-NEXT:    retq
7085;
7086; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7087; NoVLX:       # %bb.0: # %entry
7088; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7089; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
7090; NoVLX-NEXT:    kmovw %edi, %k1
7091; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7092; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7093; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7094; NoVLX-NEXT:    kmovw %k0, %eax
7095; NoVLX-NEXT:    vzeroupper
7096; NoVLX-NEXT:    retq
7097entry:
7098  %0 = bitcast <4 x i64> %__a to <8 x i32>
7099  %load = load <4 x i64>, ptr %__b
7100  %1 = bitcast <4 x i64> %load to <8 x i32>
7101  %2 = icmp sgt <8 x i32> %0, %1
7102  %3 = bitcast i8 %__u to <8 x i1>
7103  %4 = and <8 x i1> %2, %3
7104  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7105  %6 = bitcast <64 x i1> %5 to i64
7106  ret i64 %6
7107}
7108
7109
7110define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
7111; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7112; VLX:       # %bb.0: # %entry
7113; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7114; VLX-NEXT:    kmovq %k0, %rax
7115; VLX-NEXT:    vzeroupper
7116; VLX-NEXT:    retq
7117;
7118; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7119; NoVLX:       # %bb.0: # %entry
7120; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7121; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7122; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7123; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7124; NoVLX-NEXT:    kmovw %k0, %eax
7125; NoVLX-NEXT:    vzeroupper
7126; NoVLX-NEXT:    retq
7127entry:
7128  %0 = bitcast <4 x i64> %__a to <8 x i32>
7129  %load = load i32, ptr %__b
7130  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7131  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7132  %2 = icmp sgt <8 x i32> %0, %1
7133  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7134  %4 = bitcast <64 x i1> %3 to i64
7135  ret i64 %4
7136}
7137
7138define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
7139; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7140; VLX:       # %bb.0: # %entry
7141; VLX-NEXT:    kmovd %edi, %k1
7142; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7143; VLX-NEXT:    kmovq %k0, %rax
7144; VLX-NEXT:    vzeroupper
7145; VLX-NEXT:    retq
7146;
7147; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7148; NoVLX:       # %bb.0: # %entry
7149; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7150; NoVLX-NEXT:    kmovw %edi, %k1
7151; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7152; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7153; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7154; NoVLX-NEXT:    kmovw %k0, %eax
7155; NoVLX-NEXT:    vzeroupper
7156; NoVLX-NEXT:    retq
7157entry:
7158  %0 = bitcast <4 x i64> %__a to <8 x i32>
7159  %load = load i32, ptr %__b
7160  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7161  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7162  %2 = icmp sgt <8 x i32> %0, %1
7163  %3 = bitcast i8 %__u to <8 x i1>
7164  %4 = and <8 x i1> %3, %2
7165  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7166  %6 = bitcast <64 x i1> %5 to i64
7167  ret i64 %6
7168}
7169
7170
7171define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7172; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7173; VLX:       # %bb.0: # %entry
7174; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7175; VLX-NEXT:    kmovd %k0, %eax
7176; VLX-NEXT:    vzeroupper
7177; VLX-NEXT:    retq
7178;
7179; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7180; NoVLX:       # %bb.0: # %entry
7181; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7182; NoVLX-NEXT:    kmovw %k0, %eax
7183; NoVLX-NEXT:    vzeroupper
7184; NoVLX-NEXT:    retq
7185entry:
7186  %0 = bitcast <8 x i64> %__a to <16 x i32>
7187  %1 = bitcast <8 x i64> %__b to <16 x i32>
7188  %2 = icmp sgt <16 x i32> %0, %1
7189  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7190  %4 = bitcast <32 x i1> %3 to i32
7191  ret i32 %4
7192}
7193
7194define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7195; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7196; VLX:       # %bb.0: # %entry
7197; VLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7198; VLX-NEXT:    kmovd %k0, %eax
7199; VLX-NEXT:    vzeroupper
7200; VLX-NEXT:    retq
7201;
7202; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7203; NoVLX:       # %bb.0: # %entry
7204; NoVLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7205; NoVLX-NEXT:    kmovw %k0, %eax
7206; NoVLX-NEXT:    vzeroupper
7207; NoVLX-NEXT:    retq
7208entry:
7209  %0 = bitcast <8 x i64> %__a to <16 x i32>
7210  %load = load <8 x i64>, ptr %__b
7211  %1 = bitcast <8 x i64> %load to <16 x i32>
7212  %2 = icmp sgt <16 x i32> %0, %1
7213  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7214  %4 = bitcast <32 x i1> %3 to i32
7215  ret i32 %4
7216}
7217
7218define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7219; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7220; VLX:       # %bb.0: # %entry
7221; VLX-NEXT:    kmovd %edi, %k1
7222; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7223; VLX-NEXT:    kmovd %k0, %eax
7224; VLX-NEXT:    vzeroupper
7225; VLX-NEXT:    retq
7226;
7227; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7228; NoVLX:       # %bb.0: # %entry
7229; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7230; NoVLX-NEXT:    kmovw %k0, %eax
7231; NoVLX-NEXT:    andl %edi, %eax
7232; NoVLX-NEXT:    vzeroupper
7233; NoVLX-NEXT:    retq
7234entry:
7235  %0 = bitcast <8 x i64> %__a to <16 x i32>
7236  %1 = bitcast <8 x i64> %__b to <16 x i32>
7237  %2 = icmp sgt <16 x i32> %0, %1
7238  %3 = bitcast i16 %__u to <16 x i1>
7239  %4 = and <16 x i1> %2, %3
7240  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7241  %6 = bitcast <32 x i1> %5 to i32
7242  ret i32 %6
7243}
7244
7245define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7246; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7247; VLX:       # %bb.0: # %entry
7248; VLX-NEXT:    kmovd %edi, %k1
7249; VLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7250; VLX-NEXT:    kmovd %k0, %eax
7251; VLX-NEXT:    vzeroupper
7252; VLX-NEXT:    retq
7253;
7254; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7255; NoVLX:       # %bb.0: # %entry
7256; NoVLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0
7257; NoVLX-NEXT:    kmovw %k0, %eax
7258; NoVLX-NEXT:    andl %edi, %eax
7259; NoVLX-NEXT:    vzeroupper
7260; NoVLX-NEXT:    retq
7261entry:
7262  %0 = bitcast <8 x i64> %__a to <16 x i32>
7263  %load = load <8 x i64>, ptr %__b
7264  %1 = bitcast <8 x i64> %load to <16 x i32>
7265  %2 = icmp sgt <16 x i32> %0, %1
7266  %3 = bitcast i16 %__u to <16 x i1>
7267  %4 = and <16 x i1> %2, %3
7268  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7269  %6 = bitcast <32 x i1> %5 to i32
7270  ret i32 %6
7271}
7272
7273
7274define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7275; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7276; VLX:       # %bb.0: # %entry
7277; VLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7278; VLX-NEXT:    kmovd %k0, %eax
7279; VLX-NEXT:    vzeroupper
7280; VLX-NEXT:    retq
7281;
7282; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7283; NoVLX:       # %bb.0: # %entry
7284; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7285; NoVLX-NEXT:    kmovw %k0, %eax
7286; NoVLX-NEXT:    vzeroupper
7287; NoVLX-NEXT:    retq
7288entry:
7289  %0 = bitcast <8 x i64> %__a to <16 x i32>
7290  %load = load i32, ptr %__b
7291  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7292  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7293  %2 = icmp sgt <16 x i32> %0, %1
7294  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7295  %4 = bitcast <32 x i1> %3 to i32
7296  ret i32 %4
7297}
7298
7299define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7300; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7301; VLX:       # %bb.0: # %entry
7302; VLX-NEXT:    kmovd %edi, %k1
7303; VLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7304; VLX-NEXT:    kmovd %k0, %eax
7305; VLX-NEXT:    vzeroupper
7306; VLX-NEXT:    retq
7307;
7308; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7309; NoVLX:       # %bb.0: # %entry
7310; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7311; NoVLX-NEXT:    kmovw %k0, %eax
7312; NoVLX-NEXT:    andl %edi, %eax
7313; NoVLX-NEXT:    vzeroupper
7314; NoVLX-NEXT:    retq
7315entry:
7316  %0 = bitcast <8 x i64> %__a to <16 x i32>
7317  %load = load i32, ptr %__b
7318  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7319  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7320  %2 = icmp sgt <16 x i32> %0, %1
7321  %3 = bitcast i16 %__u to <16 x i1>
7322  %4 = and <16 x i1> %3, %2
7323  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7324  %6 = bitcast <32 x i1> %5 to i32
7325  ret i32 %6
7326}
7327
7328
7329define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7330; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7331; VLX:       # %bb.0: # %entry
7332; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7333; VLX-NEXT:    kmovq %k0, %rax
7334; VLX-NEXT:    vzeroupper
7335; VLX-NEXT:    retq
7336;
7337; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7338; NoVLX:       # %bb.0: # %entry
7339; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7340; NoVLX-NEXT:    kmovw %k0, %eax
7341; NoVLX-NEXT:    vzeroupper
7342; NoVLX-NEXT:    retq
7343entry:
7344  %0 = bitcast <8 x i64> %__a to <16 x i32>
7345  %1 = bitcast <8 x i64> %__b to <16 x i32>
7346  %2 = icmp sgt <16 x i32> %0, %1
7347  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7348  %4 = bitcast <64 x i1> %3 to i64
7349  ret i64 %4
7350}
7351
7352define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7353; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7354; VLX:       # %bb.0: # %entry
7355; VLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7356; VLX-NEXT:    kmovq %k0, %rax
7357; VLX-NEXT:    vzeroupper
7358; VLX-NEXT:    retq
7359;
7360; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7361; NoVLX:       # %bb.0: # %entry
7362; NoVLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7363; NoVLX-NEXT:    kmovw %k0, %eax
7364; NoVLX-NEXT:    vzeroupper
7365; NoVLX-NEXT:    retq
7366entry:
7367  %0 = bitcast <8 x i64> %__a to <16 x i32>
7368  %load = load <8 x i64>, ptr %__b
7369  %1 = bitcast <8 x i64> %load to <16 x i32>
7370  %2 = icmp sgt <16 x i32> %0, %1
7371  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7372  %4 = bitcast <64 x i1> %3 to i64
7373  ret i64 %4
7374}
7375
7376define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7377; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7378; VLX:       # %bb.0: # %entry
7379; VLX-NEXT:    kmovd %edi, %k1
7380; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7381; VLX-NEXT:    kmovq %k0, %rax
7382; VLX-NEXT:    vzeroupper
7383; VLX-NEXT:    retq
7384;
7385; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7386; NoVLX:       # %bb.0: # %entry
7387; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7388; NoVLX-NEXT:    kmovw %k0, %eax
7389; NoVLX-NEXT:    andl %edi, %eax
7390; NoVLX-NEXT:    vzeroupper
7391; NoVLX-NEXT:    retq
7392entry:
7393  %0 = bitcast <8 x i64> %__a to <16 x i32>
7394  %1 = bitcast <8 x i64> %__b to <16 x i32>
7395  %2 = icmp sgt <16 x i32> %0, %1
7396  %3 = bitcast i16 %__u to <16 x i1>
7397  %4 = and <16 x i1> %2, %3
7398  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7399  %6 = bitcast <64 x i1> %5 to i64
7400  ret i64 %6
7401}
7402
7403define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7404; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7405; VLX:       # %bb.0: # %entry
7406; VLX-NEXT:    kmovd %edi, %k1
7407; VLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7408; VLX-NEXT:    kmovq %k0, %rax
7409; VLX-NEXT:    vzeroupper
7410; VLX-NEXT:    retq
7411;
7412; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7413; NoVLX:       # %bb.0: # %entry
7414; NoVLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0
7415; NoVLX-NEXT:    kmovw %k0, %eax
7416; NoVLX-NEXT:    andl %edi, %eax
7417; NoVLX-NEXT:    vzeroupper
7418; NoVLX-NEXT:    retq
7419entry:
7420  %0 = bitcast <8 x i64> %__a to <16 x i32>
7421  %load = load <8 x i64>, ptr %__b
7422  %1 = bitcast <8 x i64> %load to <16 x i32>
7423  %2 = icmp sgt <16 x i32> %0, %1
7424  %3 = bitcast i16 %__u to <16 x i1>
7425  %4 = and <16 x i1> %2, %3
7426  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7427  %6 = bitcast <64 x i1> %5 to i64
7428  ret i64 %6
7429}
7430
7431
7432define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7433; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7434; VLX:       # %bb.0: # %entry
7435; VLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7436; VLX-NEXT:    kmovq %k0, %rax
7437; VLX-NEXT:    vzeroupper
7438; VLX-NEXT:    retq
7439;
7440; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7441; NoVLX:       # %bb.0: # %entry
7442; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7443; NoVLX-NEXT:    kmovw %k0, %eax
7444; NoVLX-NEXT:    vzeroupper
7445; NoVLX-NEXT:    retq
7446entry:
7447  %0 = bitcast <8 x i64> %__a to <16 x i32>
7448  %load = load i32, ptr %__b
7449  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7450  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7451  %2 = icmp sgt <16 x i32> %0, %1
7452  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7453  %4 = bitcast <64 x i1> %3 to i64
7454  ret i64 %4
7455}
7456
7457define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7458; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7459; VLX:       # %bb.0: # %entry
7460; VLX-NEXT:    kmovd %edi, %k1
7461; VLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7462; VLX-NEXT:    kmovq %k0, %rax
7463; VLX-NEXT:    vzeroupper
7464; VLX-NEXT:    retq
7465;
7466; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7467; NoVLX:       # %bb.0: # %entry
7468; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7469; NoVLX-NEXT:    kmovw %k0, %eax
7470; NoVLX-NEXT:    andl %edi, %eax
7471; NoVLX-NEXT:    vzeroupper
7472; NoVLX-NEXT:    retq
7473entry:
7474  %0 = bitcast <8 x i64> %__a to <16 x i32>
7475  %load = load i32, ptr %__b
7476  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7477  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7478  %2 = icmp sgt <16 x i32> %0, %1
7479  %3 = bitcast i16 %__u to <16 x i1>
7480  %4 = and <16 x i1> %3, %2
7481  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7482  %6 = bitcast <64 x i1> %5 to i64
7483  ret i64 %6
7484}
7485
7486
7487define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7488; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7489; VLX:       # %bb.0: # %entry
7490; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7491; VLX-NEXT:    kmovb %k0, %eax
7492; VLX-NEXT:    retq
7493;
7494; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7495; NoVLX:       # %bb.0: # %entry
7496; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7497; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7498; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7499; NoVLX-NEXT:    kmovw %k0, %eax
7500; NoVLX-NEXT:    andl $3, %eax
7501; NoVLX-NEXT:    vzeroupper
7502; NoVLX-NEXT:    retq
7503entry:
7504  %0 = bitcast <2 x i64> %__a to <2 x i64>
7505  %1 = bitcast <2 x i64> %__b to <2 x i64>
7506  %2 = icmp sgt <2 x i64> %0, %1
7507  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7508  %4 = bitcast <4 x i1> %3 to i4
7509  ret i4 %4
7510}
7511
7512define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7513; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7514; VLX:       # %bb.0: # %entry
7515; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7516; VLX-NEXT:    kmovb %k0, %eax
7517; VLX-NEXT:    retq
7518;
7519; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7520; NoVLX:       # %bb.0: # %entry
7521; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7522; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7523; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7524; NoVLX-NEXT:    kmovw %k0, %eax
7525; NoVLX-NEXT:    andl $3, %eax
7526; NoVLX-NEXT:    vzeroupper
7527; NoVLX-NEXT:    retq
7528entry:
7529  %0 = bitcast <2 x i64> %__a to <2 x i64>
7530  %load = load <2 x i64>, ptr %__b
7531  %1 = bitcast <2 x i64> %load to <2 x i64>
7532  %2 = icmp sgt <2 x i64> %0, %1
7533  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7534  %4 = bitcast <4 x i1> %3 to i4
7535  ret i4 %4
7536}
7537
7538define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7539; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7540; VLX:       # %bb.0: # %entry
7541; VLX-NEXT:    kmovd %edi, %k1
7542; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7543; VLX-NEXT:    kmovb %k0, %eax
7544; VLX-NEXT:    retq
7545;
7546; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7547; NoVLX:       # %bb.0: # %entry
7548; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7549; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7550; NoVLX-NEXT:    kmovw %edi, %k1
7551; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7552; NoVLX-NEXT:    kmovw %k0, %eax
7553; NoVLX-NEXT:    andl $3, %eax
7554; NoVLX-NEXT:    vzeroupper
7555; NoVLX-NEXT:    retq
7556entry:
7557  %0 = bitcast <2 x i64> %__a to <2 x i64>
7558  %1 = bitcast <2 x i64> %__b to <2 x i64>
7559  %2 = icmp sgt <2 x i64> %0, %1
7560  %3 = bitcast i8 %__u to <8 x i1>
7561  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7562  %4 = and <2 x i1> %2, %extract.i
7563  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7564  %6 = bitcast <4 x i1> %5 to i4
7565  ret i4 %6
7566}
7567
7568define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7569; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7570; VLX:       # %bb.0: # %entry
7571; VLX-NEXT:    kmovd %edi, %k1
7572; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7573; VLX-NEXT:    kmovb %k0, %eax
7574; VLX-NEXT:    retq
7575;
7576; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7577; NoVLX:       # %bb.0: # %entry
7578; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7579; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
7580; NoVLX-NEXT:    kmovw %edi, %k1
7581; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7582; NoVLX-NEXT:    kmovw %k0, %eax
7583; NoVLX-NEXT:    andl $3, %eax
7584; NoVLX-NEXT:    vzeroupper
7585; NoVLX-NEXT:    retq
7586entry:
7587  %0 = bitcast <2 x i64> %__a to <2 x i64>
7588  %load = load <2 x i64>, ptr %__b
7589  %1 = bitcast <2 x i64> %load to <2 x i64>
7590  %2 = icmp sgt <2 x i64> %0, %1
7591  %3 = bitcast i8 %__u to <8 x i1>
7592  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7593  %4 = and <2 x i1> %2, %extract.i
7594  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7595  %6 = bitcast <4 x i1> %5 to i4
7596  ret i4 %6
7597}
7598
7599
7600define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7601; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7602; VLX:       # %bb.0: # %entry
7603; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7604; VLX-NEXT:    kmovb %k0, %eax
7605; VLX-NEXT:    retq
7606;
7607; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7608; NoVLX:       # %bb.0: # %entry
7609; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7610; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7611; NoVLX-NEXT:    kmovw %k0, %eax
7612; NoVLX-NEXT:    andl $3, %eax
7613; NoVLX-NEXT:    vzeroupper
7614; NoVLX-NEXT:    retq
7615entry:
7616  %0 = bitcast <2 x i64> %__a to <2 x i64>
7617  %load = load i64, ptr %__b
7618  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7619  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7620  %2 = icmp sgt <2 x i64> %0, %1
7621  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7622  %4 = bitcast <4 x i1> %3 to i4
7623  ret i4 %4
7624}
7625
7626define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7627; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7628; VLX:       # %bb.0: # %entry
7629; VLX-NEXT:    kmovd %edi, %k1
7630; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7631; VLX-NEXT:    kmovb %k0, %eax
7632; VLX-NEXT:    retq
7633;
7634; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7635; NoVLX:       # %bb.0: # %entry
7636; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7637; NoVLX-NEXT:    kmovw %edi, %k1
7638; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7639; NoVLX-NEXT:    kmovw %k0, %eax
7640; NoVLX-NEXT:    andl $3, %eax
7641; NoVLX-NEXT:    vzeroupper
7642; NoVLX-NEXT:    retq
7643entry:
7644  %0 = bitcast <2 x i64> %__a to <2 x i64>
7645  %load = load i64, ptr %__b
7646  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7647  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7648  %2 = icmp sgt <2 x i64> %0, %1
7649  %3 = bitcast i8 %__u to <8 x i1>
7650  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7651  %4 = and <2 x i1> %extract.i, %2
7652  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7653  %6 = bitcast <4 x i1> %5 to i4
7654  ret i4 %6
7655}
7656
7657
7658define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7659; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7660; VLX:       # %bb.0: # %entry
7661; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7662; VLX-NEXT:    kmovd %k0, %eax
7663; VLX-NEXT:    # kill: def $al killed $al killed $eax
7664; VLX-NEXT:    retq
7665;
7666; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7667; NoVLX:       # %bb.0: # %entry
7668; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7669; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7670; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7671; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7672; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7673; NoVLX-NEXT:    kmovw %k0, %eax
7674; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7675; NoVLX-NEXT:    vzeroupper
7676; NoVLX-NEXT:    retq
7677entry:
7678  %0 = bitcast <2 x i64> %__a to <2 x i64>
7679  %1 = bitcast <2 x i64> %__b to <2 x i64>
7680  %2 = icmp sgt <2 x i64> %0, %1
7681  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7682  %4 = bitcast <8 x i1> %3 to i8
7683  ret i8 %4
7684}
7685
7686define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7687; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7688; VLX:       # %bb.0: # %entry
7689; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7690; VLX-NEXT:    kmovd %k0, %eax
7691; VLX-NEXT:    # kill: def $al killed $al killed $eax
7692; VLX-NEXT:    retq
7693;
7694; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7695; NoVLX:       # %bb.0: # %entry
7696; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7697; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7698; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7699; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7700; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7701; NoVLX-NEXT:    kmovw %k0, %eax
7702; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7703; NoVLX-NEXT:    vzeroupper
7704; NoVLX-NEXT:    retq
7705entry:
7706  %0 = bitcast <2 x i64> %__a to <2 x i64>
7707  %load = load <2 x i64>, ptr %__b
7708  %1 = bitcast <2 x i64> %load to <2 x i64>
7709  %2 = icmp sgt <2 x i64> %0, %1
7710  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7711  %4 = bitcast <8 x i1> %3 to i8
7712  ret i8 %4
7713}
7714
7715define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7716; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7717; VLX:       # %bb.0: # %entry
7718; VLX-NEXT:    kmovd %edi, %k1
7719; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7720; VLX-NEXT:    kmovd %k0, %eax
7721; VLX-NEXT:    # kill: def $al killed $al killed $eax
7722; VLX-NEXT:    retq
7723;
7724; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7725; NoVLX:       # %bb.0: # %entry
7726; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7727; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7728; NoVLX-NEXT:    kmovw %edi, %k1
7729; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7730; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7731; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7732; NoVLX-NEXT:    kmovw %k0, %eax
7733; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7734; NoVLX-NEXT:    vzeroupper
7735; NoVLX-NEXT:    retq
7736entry:
7737  %0 = bitcast <2 x i64> %__a to <2 x i64>
7738  %1 = bitcast <2 x i64> %__b to <2 x i64>
7739  %2 = icmp sgt <2 x i64> %0, %1
7740  %3 = bitcast i8 %__u to <8 x i1>
7741  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7742  %4 = and <2 x i1> %2, %extract.i
7743  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7744  %6 = bitcast <8 x i1> %5 to i8
7745  ret i8 %6
7746}
7747
7748define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7749; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7750; VLX:       # %bb.0: # %entry
7751; VLX-NEXT:    kmovd %edi, %k1
7752; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7753; VLX-NEXT:    kmovd %k0, %eax
7754; VLX-NEXT:    # kill: def $al killed $al killed $eax
7755; VLX-NEXT:    retq
7756;
7757; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7758; NoVLX:       # %bb.0: # %entry
7759; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7760; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
7761; NoVLX-NEXT:    kmovw %edi, %k1
7762; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7763; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7764; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7765; NoVLX-NEXT:    kmovw %k0, %eax
7766; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7767; NoVLX-NEXT:    vzeroupper
7768; NoVLX-NEXT:    retq
7769entry:
7770  %0 = bitcast <2 x i64> %__a to <2 x i64>
7771  %load = load <2 x i64>, ptr %__b
7772  %1 = bitcast <2 x i64> %load to <2 x i64>
7773  %2 = icmp sgt <2 x i64> %0, %1
7774  %3 = bitcast i8 %__u to <8 x i1>
7775  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7776  %4 = and <2 x i1> %2, %extract.i
7777  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7778  %6 = bitcast <8 x i1> %5 to i8
7779  ret i8 %6
7780}
7781
7782
7783define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7784; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7785; VLX:       # %bb.0: # %entry
7786; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7787; VLX-NEXT:    kmovd %k0, %eax
7788; VLX-NEXT:    # kill: def $al killed $al killed $eax
7789; VLX-NEXT:    retq
7790;
7791; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7792; NoVLX:       # %bb.0: # %entry
7793; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7794; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7795; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7796; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7797; NoVLX-NEXT:    kmovw %k0, %eax
7798; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7799; NoVLX-NEXT:    vzeroupper
7800; NoVLX-NEXT:    retq
7801entry:
7802  %0 = bitcast <2 x i64> %__a to <2 x i64>
7803  %load = load i64, ptr %__b
7804  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7805  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7806  %2 = icmp sgt <2 x i64> %0, %1
7807  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7808  %4 = bitcast <8 x i1> %3 to i8
7809  ret i8 %4
7810}
7811
7812define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7813; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7814; VLX:       # %bb.0: # %entry
7815; VLX-NEXT:    kmovd %edi, %k1
7816; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7817; VLX-NEXT:    kmovd %k0, %eax
7818; VLX-NEXT:    # kill: def $al killed $al killed $eax
7819; VLX-NEXT:    retq
7820;
7821; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7822; NoVLX:       # %bb.0: # %entry
7823; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7824; NoVLX-NEXT:    kmovw %edi, %k1
7825; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7826; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7827; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7828; NoVLX-NEXT:    kmovw %k0, %eax
7829; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7830; NoVLX-NEXT:    vzeroupper
7831; NoVLX-NEXT:    retq
7832entry:
7833  %0 = bitcast <2 x i64> %__a to <2 x i64>
7834  %load = load i64, ptr %__b
7835  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7836  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7837  %2 = icmp sgt <2 x i64> %0, %1
7838  %3 = bitcast i8 %__u to <8 x i1>
7839  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7840  %4 = and <2 x i1> %extract.i, %2
7841  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7842  %6 = bitcast <8 x i1> %5 to i8
7843  ret i8 %6
7844}
7845
7846
7847define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7848; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7849; VLX:       # %bb.0: # %entry
7850; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7851; VLX-NEXT:    kmovd %k0, %eax
7852; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7853; VLX-NEXT:    retq
7854;
7855; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7856; NoVLX:       # %bb.0: # %entry
7857; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7858; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7859; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7860; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7861; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7862; NoVLX-NEXT:    kmovw %k0, %eax
7863; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7864; NoVLX-NEXT:    vzeroupper
7865; NoVLX-NEXT:    retq
7866entry:
7867  %0 = bitcast <2 x i64> %__a to <2 x i64>
7868  %1 = bitcast <2 x i64> %__b to <2 x i64>
7869  %2 = icmp sgt <2 x i64> %0, %1
7870  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7871  %4 = bitcast <16 x i1> %3 to i16
7872  ret i16 %4
7873}
7874
7875define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7876; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7877; VLX:       # %bb.0: # %entry
7878; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7879; VLX-NEXT:    kmovd %k0, %eax
7880; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7881; VLX-NEXT:    retq
7882;
7883; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7884; NoVLX:       # %bb.0: # %entry
7885; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7886; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7887; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7888; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7889; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7890; NoVLX-NEXT:    kmovw %k0, %eax
7891; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7892; NoVLX-NEXT:    vzeroupper
7893; NoVLX-NEXT:    retq
7894entry:
7895  %0 = bitcast <2 x i64> %__a to <2 x i64>
7896  %load = load <2 x i64>, ptr %__b
7897  %1 = bitcast <2 x i64> %load to <2 x i64>
7898  %2 = icmp sgt <2 x i64> %0, %1
7899  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7900  %4 = bitcast <16 x i1> %3 to i16
7901  ret i16 %4
7902}
7903
7904define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7905; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7906; VLX:       # %bb.0: # %entry
7907; VLX-NEXT:    kmovd %edi, %k1
7908; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7909; VLX-NEXT:    kmovd %k0, %eax
7910; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7911; VLX-NEXT:    retq
7912;
7913; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7914; NoVLX:       # %bb.0: # %entry
7915; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7916; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7917; NoVLX-NEXT:    kmovw %edi, %k1
7918; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7919; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7920; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7921; NoVLX-NEXT:    kmovw %k0, %eax
7922; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7923; NoVLX-NEXT:    vzeroupper
7924; NoVLX-NEXT:    retq
7925entry:
7926  %0 = bitcast <2 x i64> %__a to <2 x i64>
7927  %1 = bitcast <2 x i64> %__b to <2 x i64>
7928  %2 = icmp sgt <2 x i64> %0, %1
7929  %3 = bitcast i8 %__u to <8 x i1>
7930  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7931  %4 = and <2 x i1> %2, %extract.i
7932  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7933  %6 = bitcast <16 x i1> %5 to i16
7934  ret i16 %6
7935}
7936
7937define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7938; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7939; VLX:       # %bb.0: # %entry
7940; VLX-NEXT:    kmovd %edi, %k1
7941; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7942; VLX-NEXT:    kmovd %k0, %eax
7943; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7944; VLX-NEXT:    retq
7945;
7946; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7947; NoVLX:       # %bb.0: # %entry
7948; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7949; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
7950; NoVLX-NEXT:    kmovw %edi, %k1
7951; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7952; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7953; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7954; NoVLX-NEXT:    kmovw %k0, %eax
7955; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7956; NoVLX-NEXT:    vzeroupper
7957; NoVLX-NEXT:    retq
7958entry:
7959  %0 = bitcast <2 x i64> %__a to <2 x i64>
7960  %load = load <2 x i64>, ptr %__b
7961  %1 = bitcast <2 x i64> %load to <2 x i64>
7962  %2 = icmp sgt <2 x i64> %0, %1
7963  %3 = bitcast i8 %__u to <8 x i1>
7964  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7965  %4 = and <2 x i1> %2, %extract.i
7966  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7967  %6 = bitcast <16 x i1> %5 to i16
7968  ret i16 %6
7969}
7970
7971
7972define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7973; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7974; VLX:       # %bb.0: # %entry
7975; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7976; VLX-NEXT:    kmovd %k0, %eax
7977; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7978; VLX-NEXT:    retq
7979;
7980; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7981; NoVLX:       # %bb.0: # %entry
7982; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7983; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7984; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7985; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7986; NoVLX-NEXT:    kmovw %k0, %eax
7987; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7988; NoVLX-NEXT:    vzeroupper
7989; NoVLX-NEXT:    retq
7990entry:
7991  %0 = bitcast <2 x i64> %__a to <2 x i64>
7992  %load = load i64, ptr %__b
7993  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7994  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7995  %2 = icmp sgt <2 x i64> %0, %1
7996  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7997  %4 = bitcast <16 x i1> %3 to i16
7998  ret i16 %4
7999}
8000
8001define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8002; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8003; VLX:       # %bb.0: # %entry
8004; VLX-NEXT:    kmovd %edi, %k1
8005; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8006; VLX-NEXT:    kmovd %k0, %eax
8007; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8008; VLX-NEXT:    retq
8009;
8010; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8011; NoVLX:       # %bb.0: # %entry
8012; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8013; NoVLX-NEXT:    kmovw %edi, %k1
8014; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8015; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8016; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8017; NoVLX-NEXT:    kmovw %k0, %eax
8018; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8019; NoVLX-NEXT:    vzeroupper
8020; NoVLX-NEXT:    retq
8021entry:
8022  %0 = bitcast <2 x i64> %__a to <2 x i64>
8023  %load = load i64, ptr %__b
8024  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8025  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8026  %2 = icmp sgt <2 x i64> %0, %1
8027  %3 = bitcast i8 %__u to <8 x i1>
8028  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8029  %4 = and <2 x i1> %extract.i, %2
8030  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8031  %6 = bitcast <16 x i1> %5 to i16
8032  ret i16 %6
8033}
8034
8035
8036define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8037; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8038; VLX:       # %bb.0: # %entry
8039; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
8040; VLX-NEXT:    kmovd %k0, %eax
8041; VLX-NEXT:    retq
8042;
8043; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8044; NoVLX:       # %bb.0: # %entry
8045; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8046; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8047; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8048; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8049; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8050; NoVLX-NEXT:    kmovw %k0, %eax
8051; NoVLX-NEXT:    vzeroupper
8052; NoVLX-NEXT:    retq
8053entry:
8054  %0 = bitcast <2 x i64> %__a to <2 x i64>
8055  %1 = bitcast <2 x i64> %__b to <2 x i64>
8056  %2 = icmp sgt <2 x i64> %0, %1
8057  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8058  %4 = bitcast <32 x i1> %3 to i32
8059  ret i32 %4
8060}
8061
8062define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8063; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8064; VLX:       # %bb.0: # %entry
8065; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
8066; VLX-NEXT:    kmovd %k0, %eax
8067; VLX-NEXT:    retq
8068;
8069; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8070; NoVLX:       # %bb.0: # %entry
8071; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8072; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
8073; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8074; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8075; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8076; NoVLX-NEXT:    kmovw %k0, %eax
8077; NoVLX-NEXT:    vzeroupper
8078; NoVLX-NEXT:    retq
8079entry:
8080  %0 = bitcast <2 x i64> %__a to <2 x i64>
8081  %load = load <2 x i64>, ptr %__b
8082  %1 = bitcast <2 x i64> %load to <2 x i64>
8083  %2 = icmp sgt <2 x i64> %0, %1
8084  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8085  %4 = bitcast <32 x i1> %3 to i32
8086  ret i32 %4
8087}
8088
8089define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8090; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8091; VLX:       # %bb.0: # %entry
8092; VLX-NEXT:    kmovd %edi, %k1
8093; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8094; VLX-NEXT:    kmovd %k0, %eax
8095; VLX-NEXT:    retq
8096;
8097; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8098; NoVLX:       # %bb.0: # %entry
8099; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8100; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8101; NoVLX-NEXT:    kmovw %edi, %k1
8102; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8103; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8104; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8105; NoVLX-NEXT:    kmovw %k0, %eax
8106; NoVLX-NEXT:    vzeroupper
8107; NoVLX-NEXT:    retq
8108entry:
8109  %0 = bitcast <2 x i64> %__a to <2 x i64>
8110  %1 = bitcast <2 x i64> %__b to <2 x i64>
8111  %2 = icmp sgt <2 x i64> %0, %1
8112  %3 = bitcast i8 %__u to <8 x i1>
8113  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8114  %4 = and <2 x i1> %2, %extract.i
8115  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8116  %6 = bitcast <32 x i1> %5 to i32
8117  ret i32 %6
8118}
8119
8120define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8121; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8122; VLX:       # %bb.0: # %entry
8123; VLX-NEXT:    kmovd %edi, %k1
8124; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8125; VLX-NEXT:    kmovd %k0, %eax
8126; VLX-NEXT:    retq
8127;
8128; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8129; NoVLX:       # %bb.0: # %entry
8130; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8131; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
8132; NoVLX-NEXT:    kmovw %edi, %k1
8133; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8134; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8135; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8136; NoVLX-NEXT:    kmovw %k0, %eax
8137; NoVLX-NEXT:    vzeroupper
8138; NoVLX-NEXT:    retq
8139entry:
8140  %0 = bitcast <2 x i64> %__a to <2 x i64>
8141  %load = load <2 x i64>, ptr %__b
8142  %1 = bitcast <2 x i64> %load to <2 x i64>
8143  %2 = icmp sgt <2 x i64> %0, %1
8144  %3 = bitcast i8 %__u to <8 x i1>
8145  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8146  %4 = and <2 x i1> %2, %extract.i
8147  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8148  %6 = bitcast <32 x i1> %5 to i32
8149  ret i32 %6
8150}
8151
8152
8153define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8154; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8155; VLX:       # %bb.0: # %entry
8156; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8157; VLX-NEXT:    kmovd %k0, %eax
8158; VLX-NEXT:    retq
8159;
8160; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8161; NoVLX:       # %bb.0: # %entry
8162; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8163; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8164; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8165; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8166; NoVLX-NEXT:    kmovw %k0, %eax
8167; NoVLX-NEXT:    vzeroupper
8168; NoVLX-NEXT:    retq
8169entry:
8170  %0 = bitcast <2 x i64> %__a to <2 x i64>
8171  %load = load i64, ptr %__b
8172  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8173  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8174  %2 = icmp sgt <2 x i64> %0, %1
8175  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8176  %4 = bitcast <32 x i1> %3 to i32
8177  ret i32 %4
8178}
8179
8180define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8181; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8182; VLX:       # %bb.0: # %entry
8183; VLX-NEXT:    kmovd %edi, %k1
8184; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8185; VLX-NEXT:    kmovd %k0, %eax
8186; VLX-NEXT:    retq
8187;
8188; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8189; NoVLX:       # %bb.0: # %entry
8190; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8191; NoVLX-NEXT:    kmovw %edi, %k1
8192; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8193; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8194; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8195; NoVLX-NEXT:    kmovw %k0, %eax
8196; NoVLX-NEXT:    vzeroupper
8197; NoVLX-NEXT:    retq
8198entry:
8199  %0 = bitcast <2 x i64> %__a to <2 x i64>
8200  %load = load i64, ptr %__b
8201  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8202  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8203  %2 = icmp sgt <2 x i64> %0, %1
8204  %3 = bitcast i8 %__u to <8 x i1>
8205  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8206  %4 = and <2 x i1> %extract.i, %2
8207  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8208  %6 = bitcast <32 x i1> %5 to i32
8209  ret i32 %6
8210}
8211
8212
8213define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8214; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8215; VLX:       # %bb.0: # %entry
8216; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
8217; VLX-NEXT:    kmovq %k0, %rax
8218; VLX-NEXT:    retq
8219;
8220; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8221; NoVLX:       # %bb.0: # %entry
8222; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8223; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8224; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8225; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8226; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8227; NoVLX-NEXT:    kmovw %k0, %eax
8228; NoVLX-NEXT:    vzeroupper
8229; NoVLX-NEXT:    retq
8230entry:
8231  %0 = bitcast <2 x i64> %__a to <2 x i64>
8232  %1 = bitcast <2 x i64> %__b to <2 x i64>
8233  %2 = icmp sgt <2 x i64> %0, %1
8234  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8235  %4 = bitcast <64 x i1> %3 to i64
8236  ret i64 %4
8237}
8238
8239define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8240; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8241; VLX:       # %bb.0: # %entry
8242; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
8243; VLX-NEXT:    kmovq %k0, %rax
8244; VLX-NEXT:    retq
8245;
8246; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8247; NoVLX:       # %bb.0: # %entry
8248; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8249; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
8250; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8251; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8252; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8253; NoVLX-NEXT:    kmovw %k0, %eax
8254; NoVLX-NEXT:    vzeroupper
8255; NoVLX-NEXT:    retq
8256entry:
8257  %0 = bitcast <2 x i64> %__a to <2 x i64>
8258  %load = load <2 x i64>, ptr %__b
8259  %1 = bitcast <2 x i64> %load to <2 x i64>
8260  %2 = icmp sgt <2 x i64> %0, %1
8261  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8262  %4 = bitcast <64 x i1> %3 to i64
8263  ret i64 %4
8264}
8265
8266define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8267; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8268; VLX:       # %bb.0: # %entry
8269; VLX-NEXT:    kmovd %edi, %k1
8270; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8271; VLX-NEXT:    kmovq %k0, %rax
8272; VLX-NEXT:    retq
8273;
8274; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8275; NoVLX:       # %bb.0: # %entry
8276; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8277; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8278; NoVLX-NEXT:    kmovw %edi, %k1
8279; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8280; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8281; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8282; NoVLX-NEXT:    kmovw %k0, %eax
8283; NoVLX-NEXT:    vzeroupper
8284; NoVLX-NEXT:    retq
8285entry:
8286  %0 = bitcast <2 x i64> %__a to <2 x i64>
8287  %1 = bitcast <2 x i64> %__b to <2 x i64>
8288  %2 = icmp sgt <2 x i64> %0, %1
8289  %3 = bitcast i8 %__u to <8 x i1>
8290  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8291  %4 = and <2 x i1> %2, %extract.i
8292  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8293  %6 = bitcast <64 x i1> %5 to i64
8294  ret i64 %6
8295}
8296
8297define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8298; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8299; VLX:       # %bb.0: # %entry
8300; VLX-NEXT:    kmovd %edi, %k1
8301; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8302; VLX-NEXT:    kmovq %k0, %rax
8303; VLX-NEXT:    retq
8304;
8305; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8306; NoVLX:       # %bb.0: # %entry
8307; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8308; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
8309; NoVLX-NEXT:    kmovw %edi, %k1
8310; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8311; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8312; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8313; NoVLX-NEXT:    kmovw %k0, %eax
8314; NoVLX-NEXT:    vzeroupper
8315; NoVLX-NEXT:    retq
8316entry:
8317  %0 = bitcast <2 x i64> %__a to <2 x i64>
8318  %load = load <2 x i64>, ptr %__b
8319  %1 = bitcast <2 x i64> %load to <2 x i64>
8320  %2 = icmp sgt <2 x i64> %0, %1
8321  %3 = bitcast i8 %__u to <8 x i1>
8322  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8323  %4 = and <2 x i1> %2, %extract.i
8324  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8325  %6 = bitcast <64 x i1> %5 to i64
8326  ret i64 %6
8327}
8328
8329
8330define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8331; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8332; VLX:       # %bb.0: # %entry
8333; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8334; VLX-NEXT:    kmovq %k0, %rax
8335; VLX-NEXT:    retq
8336;
8337; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8338; NoVLX:       # %bb.0: # %entry
8339; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8340; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8341; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8342; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8343; NoVLX-NEXT:    kmovw %k0, %eax
8344; NoVLX-NEXT:    vzeroupper
8345; NoVLX-NEXT:    retq
8346entry:
8347  %0 = bitcast <2 x i64> %__a to <2 x i64>
8348  %load = load i64, ptr %__b
8349  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8350  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8351  %2 = icmp sgt <2 x i64> %0, %1
8352  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8353  %4 = bitcast <64 x i1> %3 to i64
8354  ret i64 %4
8355}
8356
8357define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8358; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8359; VLX:       # %bb.0: # %entry
8360; VLX-NEXT:    kmovd %edi, %k1
8361; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8362; VLX-NEXT:    kmovq %k0, %rax
8363; VLX-NEXT:    retq
8364;
8365; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8366; NoVLX:       # %bb.0: # %entry
8367; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8368; NoVLX-NEXT:    kmovw %edi, %k1
8369; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8370; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8371; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8372; NoVLX-NEXT:    kmovw %k0, %eax
8373; NoVLX-NEXT:    vzeroupper
8374; NoVLX-NEXT:    retq
8375entry:
8376  %0 = bitcast <2 x i64> %__a to <2 x i64>
8377  %load = load i64, ptr %__b
8378  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8379  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8380  %2 = icmp sgt <2 x i64> %0, %1
8381  %3 = bitcast i8 %__u to <8 x i1>
8382  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8383  %4 = and <2 x i1> %extract.i, %2
8384  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8385  %6 = bitcast <64 x i1> %5 to i64
8386  ret i64 %6
8387}
8388
8389
8390define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8391; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8392; VLX:       # %bb.0: # %entry
8393; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8394; VLX-NEXT:    kmovd %k0, %eax
8395; VLX-NEXT:    # kill: def $al killed $al killed $eax
8396; VLX-NEXT:    vzeroupper
8397; VLX-NEXT:    retq
8398;
8399; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8400; NoVLX:       # %bb.0: # %entry
8401; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8402; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8403; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8404; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8405; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8406; NoVLX-NEXT:    kmovw %k0, %eax
8407; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8408; NoVLX-NEXT:    vzeroupper
8409; NoVLX-NEXT:    retq
8410entry:
8411  %0 = bitcast <4 x i64> %__a to <4 x i64>
8412  %1 = bitcast <4 x i64> %__b to <4 x i64>
8413  %2 = icmp sgt <4 x i64> %0, %1
8414  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8415  %4 = bitcast <8 x i1> %3 to i8
8416  ret i8 %4
8417}
8418
8419define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8420; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8421; VLX:       # %bb.0: # %entry
8422; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8423; VLX-NEXT:    kmovd %k0, %eax
8424; VLX-NEXT:    # kill: def $al killed $al killed $eax
8425; VLX-NEXT:    vzeroupper
8426; VLX-NEXT:    retq
8427;
8428; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8429; NoVLX:       # %bb.0: # %entry
8430; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8431; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8432; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8433; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8434; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8435; NoVLX-NEXT:    kmovw %k0, %eax
8436; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8437; NoVLX-NEXT:    vzeroupper
8438; NoVLX-NEXT:    retq
8439entry:
8440  %0 = bitcast <4 x i64> %__a to <4 x i64>
8441  %load = load <4 x i64>, ptr %__b
8442  %1 = bitcast <4 x i64> %load to <4 x i64>
8443  %2 = icmp sgt <4 x i64> %0, %1
8444  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8445  %4 = bitcast <8 x i1> %3 to i8
8446  ret i8 %4
8447}
8448
8449define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8450; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8451; VLX:       # %bb.0: # %entry
8452; VLX-NEXT:    kmovd %edi, %k1
8453; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8454; VLX-NEXT:    kmovd %k0, %eax
8455; VLX-NEXT:    # kill: def $al killed $al killed $eax
8456; VLX-NEXT:    vzeroupper
8457; VLX-NEXT:    retq
8458;
8459; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8460; NoVLX:       # %bb.0: # %entry
8461; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8462; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8463; NoVLX-NEXT:    kmovw %edi, %k1
8464; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8465; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8466; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8467; NoVLX-NEXT:    kmovw %k0, %eax
8468; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8469; NoVLX-NEXT:    vzeroupper
8470; NoVLX-NEXT:    retq
8471entry:
8472  %0 = bitcast <4 x i64> %__a to <4 x i64>
8473  %1 = bitcast <4 x i64> %__b to <4 x i64>
8474  %2 = icmp sgt <4 x i64> %0, %1
8475  %3 = bitcast i8 %__u to <8 x i1>
8476  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8477  %4 = and <4 x i1> %2, %extract.i
8478  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8479  %6 = bitcast <8 x i1> %5 to i8
8480  ret i8 %6
8481}
8482
8483define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8484; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8485; VLX:       # %bb.0: # %entry
8486; VLX-NEXT:    kmovd %edi, %k1
8487; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8488; VLX-NEXT:    kmovd %k0, %eax
8489; VLX-NEXT:    # kill: def $al killed $al killed $eax
8490; VLX-NEXT:    vzeroupper
8491; VLX-NEXT:    retq
8492;
8493; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8494; NoVLX:       # %bb.0: # %entry
8495; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8496; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
8497; NoVLX-NEXT:    kmovw %edi, %k1
8498; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8499; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8500; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8501; NoVLX-NEXT:    kmovw %k0, %eax
8502; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8503; NoVLX-NEXT:    vzeroupper
8504; NoVLX-NEXT:    retq
8505entry:
8506  %0 = bitcast <4 x i64> %__a to <4 x i64>
8507  %load = load <4 x i64>, ptr %__b
8508  %1 = bitcast <4 x i64> %load to <4 x i64>
8509  %2 = icmp sgt <4 x i64> %0, %1
8510  %3 = bitcast i8 %__u to <8 x i1>
8511  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8512  %4 = and <4 x i1> %2, %extract.i
8513  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8514  %6 = bitcast <8 x i1> %5 to i8
8515  ret i8 %6
8516}
8517
8518
8519define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8520; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8521; VLX:       # %bb.0: # %entry
8522; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8523; VLX-NEXT:    kmovd %k0, %eax
8524; VLX-NEXT:    # kill: def $al killed $al killed $eax
8525; VLX-NEXT:    vzeroupper
8526; VLX-NEXT:    retq
8527;
8528; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8529; NoVLX:       # %bb.0: # %entry
8530; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8531; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8532; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8533; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8534; NoVLX-NEXT:    kmovw %k0, %eax
8535; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8536; NoVLX-NEXT:    vzeroupper
8537; NoVLX-NEXT:    retq
8538entry:
8539  %0 = bitcast <4 x i64> %__a to <4 x i64>
8540  %load = load i64, ptr %__b
8541  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8542  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8543  %2 = icmp sgt <4 x i64> %0, %1
8544  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8545  %4 = bitcast <8 x i1> %3 to i8
8546  ret i8 %4
8547}
8548
8549define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8550; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8551; VLX:       # %bb.0: # %entry
8552; VLX-NEXT:    kmovd %edi, %k1
8553; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8554; VLX-NEXT:    kmovd %k0, %eax
8555; VLX-NEXT:    # kill: def $al killed $al killed $eax
8556; VLX-NEXT:    vzeroupper
8557; VLX-NEXT:    retq
8558;
8559; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8560; NoVLX:       # %bb.0: # %entry
8561; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8562; NoVLX-NEXT:    kmovw %edi, %k1
8563; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8564; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8565; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8566; NoVLX-NEXT:    kmovw %k0, %eax
8567; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8568; NoVLX-NEXT:    vzeroupper
8569; NoVLX-NEXT:    retq
8570entry:
8571  %0 = bitcast <4 x i64> %__a to <4 x i64>
8572  %load = load i64, ptr %__b
8573  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8574  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8575  %2 = icmp sgt <4 x i64> %0, %1
8576  %3 = bitcast i8 %__u to <8 x i1>
8577  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8578  %4 = and <4 x i1> %extract.i, %2
8579  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8580  %6 = bitcast <8 x i1> %5 to i8
8581  ret i8 %6
8582}
8583
8584
8585define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8586; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8587; VLX:       # %bb.0: # %entry
8588; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8589; VLX-NEXT:    kmovd %k0, %eax
8590; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8591; VLX-NEXT:    vzeroupper
8592; VLX-NEXT:    retq
8593;
8594; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8595; NoVLX:       # %bb.0: # %entry
8596; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8597; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8598; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8599; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8600; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8601; NoVLX-NEXT:    kmovw %k0, %eax
8602; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8603; NoVLX-NEXT:    vzeroupper
8604; NoVLX-NEXT:    retq
8605entry:
8606  %0 = bitcast <4 x i64> %__a to <4 x i64>
8607  %1 = bitcast <4 x i64> %__b to <4 x i64>
8608  %2 = icmp sgt <4 x i64> %0, %1
8609  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8610  %4 = bitcast <16 x i1> %3 to i16
8611  ret i16 %4
8612}
8613
8614define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8615; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8616; VLX:       # %bb.0: # %entry
8617; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8618; VLX-NEXT:    kmovd %k0, %eax
8619; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8620; VLX-NEXT:    vzeroupper
8621; VLX-NEXT:    retq
8622;
8623; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8624; NoVLX:       # %bb.0: # %entry
8625; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8626; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8627; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8628; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8629; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8630; NoVLX-NEXT:    kmovw %k0, %eax
8631; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8632; NoVLX-NEXT:    vzeroupper
8633; NoVLX-NEXT:    retq
8634entry:
8635  %0 = bitcast <4 x i64> %__a to <4 x i64>
8636  %load = load <4 x i64>, ptr %__b
8637  %1 = bitcast <4 x i64> %load to <4 x i64>
8638  %2 = icmp sgt <4 x i64> %0, %1
8639  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8640  %4 = bitcast <16 x i1> %3 to i16
8641  ret i16 %4
8642}
8643
8644define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8645; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8646; VLX:       # %bb.0: # %entry
8647; VLX-NEXT:    kmovd %edi, %k1
8648; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8649; VLX-NEXT:    kmovd %k0, %eax
8650; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8651; VLX-NEXT:    vzeroupper
8652; VLX-NEXT:    retq
8653;
8654; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8655; NoVLX:       # %bb.0: # %entry
8656; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8657; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8658; NoVLX-NEXT:    kmovw %edi, %k1
8659; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8660; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8661; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8662; NoVLX-NEXT:    kmovw %k0, %eax
8663; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8664; NoVLX-NEXT:    vzeroupper
8665; NoVLX-NEXT:    retq
8666entry:
8667  %0 = bitcast <4 x i64> %__a to <4 x i64>
8668  %1 = bitcast <4 x i64> %__b to <4 x i64>
8669  %2 = icmp sgt <4 x i64> %0, %1
8670  %3 = bitcast i8 %__u to <8 x i1>
8671  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8672  %4 = and <4 x i1> %2, %extract.i
8673  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8674  %6 = bitcast <16 x i1> %5 to i16
8675  ret i16 %6
8676}
8677
8678define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8679; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8680; VLX:       # %bb.0: # %entry
8681; VLX-NEXT:    kmovd %edi, %k1
8682; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8683; VLX-NEXT:    kmovd %k0, %eax
8684; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8685; VLX-NEXT:    vzeroupper
8686; VLX-NEXT:    retq
8687;
8688; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8689; NoVLX:       # %bb.0: # %entry
8690; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8691; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
8692; NoVLX-NEXT:    kmovw %edi, %k1
8693; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8694; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8695; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8696; NoVLX-NEXT:    kmovw %k0, %eax
8697; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8698; NoVLX-NEXT:    vzeroupper
8699; NoVLX-NEXT:    retq
8700entry:
8701  %0 = bitcast <4 x i64> %__a to <4 x i64>
8702  %load = load <4 x i64>, ptr %__b
8703  %1 = bitcast <4 x i64> %load to <4 x i64>
8704  %2 = icmp sgt <4 x i64> %0, %1
8705  %3 = bitcast i8 %__u to <8 x i1>
8706  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8707  %4 = and <4 x i1> %2, %extract.i
8708  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8709  %6 = bitcast <16 x i1> %5 to i16
8710  ret i16 %6
8711}
8712
8713
8714define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8715; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8716; VLX:       # %bb.0: # %entry
8717; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8718; VLX-NEXT:    kmovd %k0, %eax
8719; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8720; VLX-NEXT:    vzeroupper
8721; VLX-NEXT:    retq
8722;
8723; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8724; NoVLX:       # %bb.0: # %entry
8725; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8726; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8727; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8728; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8729; NoVLX-NEXT:    kmovw %k0, %eax
8730; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8731; NoVLX-NEXT:    vzeroupper
8732; NoVLX-NEXT:    retq
8733entry:
8734  %0 = bitcast <4 x i64> %__a to <4 x i64>
8735  %load = load i64, ptr %__b
8736  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8737  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8738  %2 = icmp sgt <4 x i64> %0, %1
8739  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8740  %4 = bitcast <16 x i1> %3 to i16
8741  ret i16 %4
8742}
8743
8744define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8745; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8746; VLX:       # %bb.0: # %entry
8747; VLX-NEXT:    kmovd %edi, %k1
8748; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8749; VLX-NEXT:    kmovd %k0, %eax
8750; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8751; VLX-NEXT:    vzeroupper
8752; VLX-NEXT:    retq
8753;
8754; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8755; NoVLX:       # %bb.0: # %entry
8756; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8757; NoVLX-NEXT:    kmovw %edi, %k1
8758; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8759; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8760; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8761; NoVLX-NEXT:    kmovw %k0, %eax
8762; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8763; NoVLX-NEXT:    vzeroupper
8764; NoVLX-NEXT:    retq
8765entry:
8766  %0 = bitcast <4 x i64> %__a to <4 x i64>
8767  %load = load i64, ptr %__b
8768  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8769  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8770  %2 = icmp sgt <4 x i64> %0, %1
8771  %3 = bitcast i8 %__u to <8 x i1>
8772  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8773  %4 = and <4 x i1> %extract.i, %2
8774  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8775  %6 = bitcast <16 x i1> %5 to i16
8776  ret i16 %6
8777}
8778
8779
8780define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8781; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8782; VLX:       # %bb.0: # %entry
8783; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8784; VLX-NEXT:    kmovd %k0, %eax
8785; VLX-NEXT:    vzeroupper
8786; VLX-NEXT:    retq
8787;
8788; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8789; NoVLX:       # %bb.0: # %entry
8790; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8791; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8792; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8793; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8794; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8795; NoVLX-NEXT:    kmovw %k0, %eax
8796; NoVLX-NEXT:    vzeroupper
8797; NoVLX-NEXT:    retq
8798entry:
8799  %0 = bitcast <4 x i64> %__a to <4 x i64>
8800  %1 = bitcast <4 x i64> %__b to <4 x i64>
8801  %2 = icmp sgt <4 x i64> %0, %1
8802  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8803  %4 = bitcast <32 x i1> %3 to i32
8804  ret i32 %4
8805}
8806
8807define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8808; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8809; VLX:       # %bb.0: # %entry
8810; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8811; VLX-NEXT:    kmovd %k0, %eax
8812; VLX-NEXT:    vzeroupper
8813; VLX-NEXT:    retq
8814;
8815; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8816; NoVLX:       # %bb.0: # %entry
8817; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8818; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8819; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8820; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8821; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8822; NoVLX-NEXT:    kmovw %k0, %eax
8823; NoVLX-NEXT:    vzeroupper
8824; NoVLX-NEXT:    retq
8825entry:
8826  %0 = bitcast <4 x i64> %__a to <4 x i64>
8827  %load = load <4 x i64>, ptr %__b
8828  %1 = bitcast <4 x i64> %load to <4 x i64>
8829  %2 = icmp sgt <4 x i64> %0, %1
8830  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8831  %4 = bitcast <32 x i1> %3 to i32
8832  ret i32 %4
8833}
8834
8835define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8836; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8837; VLX:       # %bb.0: # %entry
8838; VLX-NEXT:    kmovd %edi, %k1
8839; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8840; VLX-NEXT:    kmovd %k0, %eax
8841; VLX-NEXT:    vzeroupper
8842; VLX-NEXT:    retq
8843;
8844; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8845; NoVLX:       # %bb.0: # %entry
8846; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8847; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8848; NoVLX-NEXT:    kmovw %edi, %k1
8849; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8850; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8851; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8852; NoVLX-NEXT:    kmovw %k0, %eax
8853; NoVLX-NEXT:    vzeroupper
8854; NoVLX-NEXT:    retq
8855entry:
8856  %0 = bitcast <4 x i64> %__a to <4 x i64>
8857  %1 = bitcast <4 x i64> %__b to <4 x i64>
8858  %2 = icmp sgt <4 x i64> %0, %1
8859  %3 = bitcast i8 %__u to <8 x i1>
8860  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8861  %4 = and <4 x i1> %2, %extract.i
8862  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8863  %6 = bitcast <32 x i1> %5 to i32
8864  ret i32 %6
8865}
8866
8867define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8868; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8869; VLX:       # %bb.0: # %entry
8870; VLX-NEXT:    kmovd %edi, %k1
8871; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8872; VLX-NEXT:    kmovd %k0, %eax
8873; VLX-NEXT:    vzeroupper
8874; VLX-NEXT:    retq
8875;
8876; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8877; NoVLX:       # %bb.0: # %entry
8878; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8879; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
8880; NoVLX-NEXT:    kmovw %edi, %k1
8881; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8882; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8883; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8884; NoVLX-NEXT:    kmovw %k0, %eax
8885; NoVLX-NEXT:    vzeroupper
8886; NoVLX-NEXT:    retq
8887entry:
8888  %0 = bitcast <4 x i64> %__a to <4 x i64>
8889  %load = load <4 x i64>, ptr %__b
8890  %1 = bitcast <4 x i64> %load to <4 x i64>
8891  %2 = icmp sgt <4 x i64> %0, %1
8892  %3 = bitcast i8 %__u to <8 x i1>
8893  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8894  %4 = and <4 x i1> %2, %extract.i
8895  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8896  %6 = bitcast <32 x i1> %5 to i32
8897  ret i32 %6
8898}
8899
8900
8901define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8902; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8903; VLX:       # %bb.0: # %entry
8904; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8905; VLX-NEXT:    kmovd %k0, %eax
8906; VLX-NEXT:    vzeroupper
8907; VLX-NEXT:    retq
8908;
8909; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8910; NoVLX:       # %bb.0: # %entry
8911; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8912; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8913; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8914; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8915; NoVLX-NEXT:    kmovw %k0, %eax
8916; NoVLX-NEXT:    vzeroupper
8917; NoVLX-NEXT:    retq
8918entry:
8919  %0 = bitcast <4 x i64> %__a to <4 x i64>
8920  %load = load i64, ptr %__b
8921  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8922  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8923  %2 = icmp sgt <4 x i64> %0, %1
8924  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8925  %4 = bitcast <32 x i1> %3 to i32
8926  ret i32 %4
8927}
8928
8929define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8930; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8931; VLX:       # %bb.0: # %entry
8932; VLX-NEXT:    kmovd %edi, %k1
8933; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8934; VLX-NEXT:    kmovd %k0, %eax
8935; VLX-NEXT:    vzeroupper
8936; VLX-NEXT:    retq
8937;
8938; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8939; NoVLX:       # %bb.0: # %entry
8940; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8941; NoVLX-NEXT:    kmovw %edi, %k1
8942; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8943; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8944; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8945; NoVLX-NEXT:    kmovw %k0, %eax
8946; NoVLX-NEXT:    vzeroupper
8947; NoVLX-NEXT:    retq
8948entry:
8949  %0 = bitcast <4 x i64> %__a to <4 x i64>
8950  %load = load i64, ptr %__b
8951  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8952  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8953  %2 = icmp sgt <4 x i64> %0, %1
8954  %3 = bitcast i8 %__u to <8 x i1>
8955  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8956  %4 = and <4 x i1> %extract.i, %2
8957  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8958  %6 = bitcast <32 x i1> %5 to i32
8959  ret i32 %6
8960}
8961
8962
8963define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8964; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8965; VLX:       # %bb.0: # %entry
8966; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8967; VLX-NEXT:    kmovq %k0, %rax
8968; VLX-NEXT:    vzeroupper
8969; VLX-NEXT:    retq
8970;
8971; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8972; NoVLX:       # %bb.0: # %entry
8973; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8974; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8975; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8976; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8977; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8978; NoVLX-NEXT:    kmovw %k0, %eax
8979; NoVLX-NEXT:    vzeroupper
8980; NoVLX-NEXT:    retq
8981entry:
8982  %0 = bitcast <4 x i64> %__a to <4 x i64>
8983  %1 = bitcast <4 x i64> %__b to <4 x i64>
8984  %2 = icmp sgt <4 x i64> %0, %1
8985  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8986  %4 = bitcast <64 x i1> %3 to i64
8987  ret i64 %4
8988}
8989
8990define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8991; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8992; VLX:       # %bb.0: # %entry
8993; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8994; VLX-NEXT:    kmovq %k0, %rax
8995; VLX-NEXT:    vzeroupper
8996; VLX-NEXT:    retq
8997;
8998; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8999; NoVLX:       # %bb.0: # %entry
9000; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9001; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
9002; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9003; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9004; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9005; NoVLX-NEXT:    kmovw %k0, %eax
9006; NoVLX-NEXT:    vzeroupper
9007; NoVLX-NEXT:    retq
9008entry:
9009  %0 = bitcast <4 x i64> %__a to <4 x i64>
9010  %load = load <4 x i64>, ptr %__b
9011  %1 = bitcast <4 x i64> %load to <4 x i64>
9012  %2 = icmp sgt <4 x i64> %0, %1
9013  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9014  %4 = bitcast <64 x i1> %3 to i64
9015  ret i64 %4
9016}
9017
9018define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9019; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9020; VLX:       # %bb.0: # %entry
9021; VLX-NEXT:    kmovd %edi, %k1
9022; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9023; VLX-NEXT:    kmovq %k0, %rax
9024; VLX-NEXT:    vzeroupper
9025; VLX-NEXT:    retq
9026;
9027; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9028; NoVLX:       # %bb.0: # %entry
9029; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
9030; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9031; NoVLX-NEXT:    kmovw %edi, %k1
9032; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9033; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9034; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9035; NoVLX-NEXT:    kmovw %k0, %eax
9036; NoVLX-NEXT:    vzeroupper
9037; NoVLX-NEXT:    retq
9038entry:
9039  %0 = bitcast <4 x i64> %__a to <4 x i64>
9040  %1 = bitcast <4 x i64> %__b to <4 x i64>
9041  %2 = icmp sgt <4 x i64> %0, %1
9042  %3 = bitcast i8 %__u to <8 x i1>
9043  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9044  %4 = and <4 x i1> %2, %extract.i
9045  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9046  %6 = bitcast <64 x i1> %5 to i64
9047  ret i64 %6
9048}
9049
9050define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9051; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9052; VLX:       # %bb.0: # %entry
9053; VLX-NEXT:    kmovd %edi, %k1
9054; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9055; VLX-NEXT:    kmovq %k0, %rax
9056; VLX-NEXT:    vzeroupper
9057; VLX-NEXT:    retq
9058;
9059; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9060; NoVLX:       # %bb.0: # %entry
9061; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9062; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
9063; NoVLX-NEXT:    kmovw %edi, %k1
9064; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9065; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9066; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9067; NoVLX-NEXT:    kmovw %k0, %eax
9068; NoVLX-NEXT:    vzeroupper
9069; NoVLX-NEXT:    retq
9070entry:
9071  %0 = bitcast <4 x i64> %__a to <4 x i64>
9072  %load = load <4 x i64>, ptr %__b
9073  %1 = bitcast <4 x i64> %load to <4 x i64>
9074  %2 = icmp sgt <4 x i64> %0, %1
9075  %3 = bitcast i8 %__u to <8 x i1>
9076  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9077  %4 = and <4 x i1> %2, %extract.i
9078  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9079  %6 = bitcast <64 x i1> %5 to i64
9080  ret i64 %6
9081}
9082
9083
9084define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
9085; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9086; VLX:       # %bb.0: # %entry
9087; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9088; VLX-NEXT:    kmovq %k0, %rax
9089; VLX-NEXT:    vzeroupper
9090; VLX-NEXT:    retq
9091;
9092; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9093; NoVLX:       # %bb.0: # %entry
9094; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9095; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9096; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9097; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9098; NoVLX-NEXT:    kmovw %k0, %eax
9099; NoVLX-NEXT:    vzeroupper
9100; NoVLX-NEXT:    retq
9101entry:
9102  %0 = bitcast <4 x i64> %__a to <4 x i64>
9103  %load = load i64, ptr %__b
9104  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9105  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9106  %2 = icmp sgt <4 x i64> %0, %1
9107  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9108  %4 = bitcast <64 x i1> %3 to i64
9109  ret i64 %4
9110}
9111
9112define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9113; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9114; VLX:       # %bb.0: # %entry
9115; VLX-NEXT:    kmovd %edi, %k1
9116; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9117; VLX-NEXT:    kmovq %k0, %rax
9118; VLX-NEXT:    vzeroupper
9119; VLX-NEXT:    retq
9120;
9121; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9122; NoVLX:       # %bb.0: # %entry
9123; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9124; NoVLX-NEXT:    kmovw %edi, %k1
9125; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9126; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9127; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9128; NoVLX-NEXT:    kmovw %k0, %eax
9129; NoVLX-NEXT:    vzeroupper
9130; NoVLX-NEXT:    retq
9131entry:
9132  %0 = bitcast <4 x i64> %__a to <4 x i64>
9133  %load = load i64, ptr %__b
9134  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9135  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9136  %2 = icmp sgt <4 x i64> %0, %1
9137  %3 = bitcast i8 %__u to <8 x i1>
9138  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9139  %4 = and <4 x i1> %extract.i, %2
9140  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9141  %6 = bitcast <64 x i1> %5 to i64
9142  ret i64 %6
9143}
9144
9145
9146define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9147; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9148; VLX:       # %bb.0: # %entry
9149; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9150; VLX-NEXT:    kmovd %k0, %eax
9151; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9152; VLX-NEXT:    vzeroupper
9153; VLX-NEXT:    retq
9154;
9155; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9156; NoVLX:       # %bb.0: # %entry
9157; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9158; NoVLX-NEXT:    kmovw %k0, %eax
9159; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9160; NoVLX-NEXT:    vzeroupper
9161; NoVLX-NEXT:    retq
9162entry:
9163  %0 = bitcast <8 x i64> %__a to <8 x i64>
9164  %1 = bitcast <8 x i64> %__b to <8 x i64>
9165  %2 = icmp sgt <8 x i64> %0, %1
9166  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9167  %4 = bitcast <16 x i1> %3 to i16
9168  ret i16 %4
9169}
9170
9171define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9172; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9173; VLX:       # %bb.0: # %entry
9174; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9175; VLX-NEXT:    kmovd %k0, %eax
9176; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9177; VLX-NEXT:    vzeroupper
9178; VLX-NEXT:    retq
9179;
9180; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9181; NoVLX:       # %bb.0: # %entry
9182; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9183; NoVLX-NEXT:    kmovw %k0, %eax
9184; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9185; NoVLX-NEXT:    vzeroupper
9186; NoVLX-NEXT:    retq
9187entry:
9188  %0 = bitcast <8 x i64> %__a to <8 x i64>
9189  %load = load <8 x i64>, ptr %__b
9190  %1 = bitcast <8 x i64> %load to <8 x i64>
9191  %2 = icmp sgt <8 x i64> %0, %1
9192  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9193  %4 = bitcast <16 x i1> %3 to i16
9194  ret i16 %4
9195}
9196
9197define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9198; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9199; VLX:       # %bb.0: # %entry
9200; VLX-NEXT:    kmovd %edi, %k1
9201; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9202; VLX-NEXT:    kmovd %k0, %eax
9203; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9204; VLX-NEXT:    vzeroupper
9205; VLX-NEXT:    retq
9206;
9207; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9208; NoVLX:       # %bb.0: # %entry
9209; NoVLX-NEXT:    kmovw %edi, %k1
9210; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9211; NoVLX-NEXT:    kmovw %k0, %eax
9212; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9213; NoVLX-NEXT:    vzeroupper
9214; NoVLX-NEXT:    retq
9215entry:
9216  %0 = bitcast <8 x i64> %__a to <8 x i64>
9217  %1 = bitcast <8 x i64> %__b to <8 x i64>
9218  %2 = icmp sgt <8 x i64> %0, %1
9219  %3 = bitcast i8 %__u to <8 x i1>
9220  %4 = and <8 x i1> %2, %3
9221  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9222  %6 = bitcast <16 x i1> %5 to i16
9223  ret i16 %6
9224}
9225
9226define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9227; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9228; VLX:       # %bb.0: # %entry
9229; VLX-NEXT:    kmovd %edi, %k1
9230; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9231; VLX-NEXT:    kmovd %k0, %eax
9232; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9233; VLX-NEXT:    vzeroupper
9234; VLX-NEXT:    retq
9235;
9236; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9237; NoVLX:       # %bb.0: # %entry
9238; NoVLX-NEXT:    kmovw %edi, %k1
9239; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9240; NoVLX-NEXT:    kmovw %k0, %eax
9241; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9242; NoVLX-NEXT:    vzeroupper
9243; NoVLX-NEXT:    retq
9244entry:
9245  %0 = bitcast <8 x i64> %__a to <8 x i64>
9246  %load = load <8 x i64>, ptr %__b
9247  %1 = bitcast <8 x i64> %load to <8 x i64>
9248  %2 = icmp sgt <8 x i64> %0, %1
9249  %3 = bitcast i8 %__u to <8 x i1>
9250  %4 = and <8 x i1> %2, %3
9251  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9252  %6 = bitcast <16 x i1> %5 to i16
9253  ret i16 %6
9254}
9255
9256
9257define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9258; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9259; VLX:       # %bb.0: # %entry
9260; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9261; VLX-NEXT:    kmovd %k0, %eax
9262; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9263; VLX-NEXT:    vzeroupper
9264; VLX-NEXT:    retq
9265;
9266; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9267; NoVLX:       # %bb.0: # %entry
9268; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9269; NoVLX-NEXT:    kmovw %k0, %eax
9270; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9271; NoVLX-NEXT:    vzeroupper
9272; NoVLX-NEXT:    retq
9273entry:
9274  %0 = bitcast <8 x i64> %__a to <8 x i64>
9275  %load = load i64, ptr %__b
9276  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9277  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9278  %2 = icmp sgt <8 x i64> %0, %1
9279  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9280  %4 = bitcast <16 x i1> %3 to i16
9281  ret i16 %4
9282}
9283
9284define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9285; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9286; VLX:       # %bb.0: # %entry
9287; VLX-NEXT:    kmovd %edi, %k1
9288; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9289; VLX-NEXT:    kmovd %k0, %eax
9290; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9291; VLX-NEXT:    vzeroupper
9292; VLX-NEXT:    retq
9293;
9294; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9295; NoVLX:       # %bb.0: # %entry
9296; NoVLX-NEXT:    kmovw %edi, %k1
9297; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9298; NoVLX-NEXT:    kmovw %k0, %eax
9299; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9300; NoVLX-NEXT:    vzeroupper
9301; NoVLX-NEXT:    retq
9302entry:
9303  %0 = bitcast <8 x i64> %__a to <8 x i64>
9304  %load = load i64, ptr %__b
9305  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9306  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9307  %2 = icmp sgt <8 x i64> %0, %1
9308  %3 = bitcast i8 %__u to <8 x i1>
9309  %4 = and <8 x i1> %3, %2
9310  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9311  %6 = bitcast <16 x i1> %5 to i16
9312  ret i16 %6
9313}
9314
9315
9316define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9317; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9318; VLX:       # %bb.0: # %entry
9319; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9320; VLX-NEXT:    kmovd %k0, %eax
9321; VLX-NEXT:    vzeroupper
9322; VLX-NEXT:    retq
9323;
9324; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9325; NoVLX:       # %bb.0: # %entry
9326; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9327; NoVLX-NEXT:    kmovw %k0, %eax
9328; NoVLX-NEXT:    vzeroupper
9329; NoVLX-NEXT:    retq
9330entry:
9331  %0 = bitcast <8 x i64> %__a to <8 x i64>
9332  %1 = bitcast <8 x i64> %__b to <8 x i64>
9333  %2 = icmp sgt <8 x i64> %0, %1
9334  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9335  %4 = bitcast <32 x i1> %3 to i32
9336  ret i32 %4
9337}
9338
9339define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9340; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9341; VLX:       # %bb.0: # %entry
9342; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9343; VLX-NEXT:    kmovd %k0, %eax
9344; VLX-NEXT:    vzeroupper
9345; VLX-NEXT:    retq
9346;
9347; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9348; NoVLX:       # %bb.0: # %entry
9349; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9350; NoVLX-NEXT:    kmovw %k0, %eax
9351; NoVLX-NEXT:    vzeroupper
9352; NoVLX-NEXT:    retq
9353entry:
9354  %0 = bitcast <8 x i64> %__a to <8 x i64>
9355  %load = load <8 x i64>, ptr %__b
9356  %1 = bitcast <8 x i64> %load to <8 x i64>
9357  %2 = icmp sgt <8 x i64> %0, %1
9358  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9359  %4 = bitcast <32 x i1> %3 to i32
9360  ret i32 %4
9361}
9362
9363define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9364; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9365; VLX:       # %bb.0: # %entry
9366; VLX-NEXT:    kmovd %edi, %k1
9367; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9368; VLX-NEXT:    kmovd %k0, %eax
9369; VLX-NEXT:    vzeroupper
9370; VLX-NEXT:    retq
9371;
9372; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9373; NoVLX:       # %bb.0: # %entry
9374; NoVLX-NEXT:    kmovw %edi, %k1
9375; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9376; NoVLX-NEXT:    kmovw %k0, %eax
9377; NoVLX-NEXT:    vzeroupper
9378; NoVLX-NEXT:    retq
9379entry:
9380  %0 = bitcast <8 x i64> %__a to <8 x i64>
9381  %1 = bitcast <8 x i64> %__b to <8 x i64>
9382  %2 = icmp sgt <8 x i64> %0, %1
9383  %3 = bitcast i8 %__u to <8 x i1>
9384  %4 = and <8 x i1> %2, %3
9385  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9386  %6 = bitcast <32 x i1> %5 to i32
9387  ret i32 %6
9388}
9389
9390define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9391; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9392; VLX:       # %bb.0: # %entry
9393; VLX-NEXT:    kmovd %edi, %k1
9394; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9395; VLX-NEXT:    kmovd %k0, %eax
9396; VLX-NEXT:    vzeroupper
9397; VLX-NEXT:    retq
9398;
9399; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9400; NoVLX:       # %bb.0: # %entry
9401; NoVLX-NEXT:    kmovw %edi, %k1
9402; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9403; NoVLX-NEXT:    kmovw %k0, %eax
9404; NoVLX-NEXT:    vzeroupper
9405; NoVLX-NEXT:    retq
9406entry:
9407  %0 = bitcast <8 x i64> %__a to <8 x i64>
9408  %load = load <8 x i64>, ptr %__b
9409  %1 = bitcast <8 x i64> %load to <8 x i64>
9410  %2 = icmp sgt <8 x i64> %0, %1
9411  %3 = bitcast i8 %__u to <8 x i1>
9412  %4 = and <8 x i1> %2, %3
9413  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9414  %6 = bitcast <32 x i1> %5 to i32
9415  ret i32 %6
9416}
9417
9418
9419define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9420; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9421; VLX:       # %bb.0: # %entry
9422; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9423; VLX-NEXT:    kmovd %k0, %eax
9424; VLX-NEXT:    vzeroupper
9425; VLX-NEXT:    retq
9426;
9427; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9428; NoVLX:       # %bb.0: # %entry
9429; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9430; NoVLX-NEXT:    kmovw %k0, %eax
9431; NoVLX-NEXT:    vzeroupper
9432; NoVLX-NEXT:    retq
9433entry:
9434  %0 = bitcast <8 x i64> %__a to <8 x i64>
9435  %load = load i64, ptr %__b
9436  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9437  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9438  %2 = icmp sgt <8 x i64> %0, %1
9439  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9440  %4 = bitcast <32 x i1> %3 to i32
9441  ret i32 %4
9442}
9443
9444define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9445; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9446; VLX:       # %bb.0: # %entry
9447; VLX-NEXT:    kmovd %edi, %k1
9448; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9449; VLX-NEXT:    kmovd %k0, %eax
9450; VLX-NEXT:    vzeroupper
9451; VLX-NEXT:    retq
9452;
9453; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9454; NoVLX:       # %bb.0: # %entry
9455; NoVLX-NEXT:    kmovw %edi, %k1
9456; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9457; NoVLX-NEXT:    kmovw %k0, %eax
9458; NoVLX-NEXT:    vzeroupper
9459; NoVLX-NEXT:    retq
9460entry:
9461  %0 = bitcast <8 x i64> %__a to <8 x i64>
9462  %load = load i64, ptr %__b
9463  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9464  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9465  %2 = icmp sgt <8 x i64> %0, %1
9466  %3 = bitcast i8 %__u to <8 x i1>
9467  %4 = and <8 x i1> %3, %2
9468  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9469  %6 = bitcast <32 x i1> %5 to i32
9470  ret i32 %6
9471}
9472
9473
9474define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9475; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9476; VLX:       # %bb.0: # %entry
9477; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9478; VLX-NEXT:    kmovq %k0, %rax
9479; VLX-NEXT:    vzeroupper
9480; VLX-NEXT:    retq
9481;
9482; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9483; NoVLX:       # %bb.0: # %entry
9484; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9485; NoVLX-NEXT:    kmovw %k0, %eax
9486; NoVLX-NEXT:    vzeroupper
9487; NoVLX-NEXT:    retq
9488entry:
9489  %0 = bitcast <8 x i64> %__a to <8 x i64>
9490  %1 = bitcast <8 x i64> %__b to <8 x i64>
9491  %2 = icmp sgt <8 x i64> %0, %1
9492  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9493  %4 = bitcast <64 x i1> %3 to i64
9494  ret i64 %4
9495}
9496
9497define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9498; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9499; VLX:       # %bb.0: # %entry
9500; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9501; VLX-NEXT:    kmovq %k0, %rax
9502; VLX-NEXT:    vzeroupper
9503; VLX-NEXT:    retq
9504;
9505; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9506; NoVLX:       # %bb.0: # %entry
9507; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9508; NoVLX-NEXT:    kmovw %k0, %eax
9509; NoVLX-NEXT:    vzeroupper
9510; NoVLX-NEXT:    retq
9511entry:
9512  %0 = bitcast <8 x i64> %__a to <8 x i64>
9513  %load = load <8 x i64>, ptr %__b
9514  %1 = bitcast <8 x i64> %load to <8 x i64>
9515  %2 = icmp sgt <8 x i64> %0, %1
9516  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9517  %4 = bitcast <64 x i1> %3 to i64
9518  ret i64 %4
9519}
9520
9521define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9522; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9523; VLX:       # %bb.0: # %entry
9524; VLX-NEXT:    kmovd %edi, %k1
9525; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9526; VLX-NEXT:    kmovq %k0, %rax
9527; VLX-NEXT:    vzeroupper
9528; VLX-NEXT:    retq
9529;
9530; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9531; NoVLX:       # %bb.0: # %entry
9532; NoVLX-NEXT:    kmovw %edi, %k1
9533; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9534; NoVLX-NEXT:    kmovw %k0, %eax
9535; NoVLX-NEXT:    vzeroupper
9536; NoVLX-NEXT:    retq
9537entry:
9538  %0 = bitcast <8 x i64> %__a to <8 x i64>
9539  %1 = bitcast <8 x i64> %__b to <8 x i64>
9540  %2 = icmp sgt <8 x i64> %0, %1
9541  %3 = bitcast i8 %__u to <8 x i1>
9542  %4 = and <8 x i1> %2, %3
9543  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9544  %6 = bitcast <64 x i1> %5 to i64
9545  ret i64 %6
9546}
9547
9548define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9549; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9550; VLX:       # %bb.0: # %entry
9551; VLX-NEXT:    kmovd %edi, %k1
9552; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9553; VLX-NEXT:    kmovq %k0, %rax
9554; VLX-NEXT:    vzeroupper
9555; VLX-NEXT:    retq
9556;
9557; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9558; NoVLX:       # %bb.0: # %entry
9559; NoVLX-NEXT:    kmovw %edi, %k1
9560; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9561; NoVLX-NEXT:    kmovw %k0, %eax
9562; NoVLX-NEXT:    vzeroupper
9563; NoVLX-NEXT:    retq
9564entry:
9565  %0 = bitcast <8 x i64> %__a to <8 x i64>
9566  %load = load <8 x i64>, ptr %__b
9567  %1 = bitcast <8 x i64> %load to <8 x i64>
9568  %2 = icmp sgt <8 x i64> %0, %1
9569  %3 = bitcast i8 %__u to <8 x i1>
9570  %4 = and <8 x i1> %2, %3
9571  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9572  %6 = bitcast <64 x i1> %5 to i64
9573  ret i64 %6
9574}
9575
9576
9577define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9578; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9579; VLX:       # %bb.0: # %entry
9580; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9581; VLX-NEXT:    kmovq %k0, %rax
9582; VLX-NEXT:    vzeroupper
9583; VLX-NEXT:    retq
9584;
9585; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9586; NoVLX:       # %bb.0: # %entry
9587; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9588; NoVLX-NEXT:    kmovw %k0, %eax
9589; NoVLX-NEXT:    vzeroupper
9590; NoVLX-NEXT:    retq
9591entry:
9592  %0 = bitcast <8 x i64> %__a to <8 x i64>
9593  %load = load i64, ptr %__b
9594  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9595  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9596  %2 = icmp sgt <8 x i64> %0, %1
9597  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9598  %4 = bitcast <64 x i1> %3 to i64
9599  ret i64 %4
9600}
9601
9602define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9603; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9604; VLX:       # %bb.0: # %entry
9605; VLX-NEXT:    kmovd %edi, %k1
9606; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9607; VLX-NEXT:    kmovq %k0, %rax
9608; VLX-NEXT:    vzeroupper
9609; VLX-NEXT:    retq
9610;
9611; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9612; NoVLX:       # %bb.0: # %entry
9613; NoVLX-NEXT:    kmovw %edi, %k1
9614; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9615; NoVLX-NEXT:    kmovw %k0, %eax
9616; NoVLX-NEXT:    vzeroupper
9617; NoVLX-NEXT:    retq
9618entry:
9619  %0 = bitcast <8 x i64> %__a to <8 x i64>
9620  %load = load i64, ptr %__b
9621  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9622  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9623  %2 = icmp sgt <8 x i64> %0, %1
9624  %3 = bitcast i8 %__u to <8 x i1>
9625  %4 = and <8 x i1> %3, %2
9626  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9627  %6 = bitcast <64 x i1> %5 to i64
9628  ret i64 %6
9629}
9630
9631
9632define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9633; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9634; VLX:       # %bb.0: # %entry
9635; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0
9636; VLX-NEXT:    kmovd %k0, %eax
9637; VLX-NEXT:    retq
9638;
9639; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9640; NoVLX:       # %bb.0: # %entry
9641; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9642; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9643; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9644; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9645; NoVLX-NEXT:    kmovw %k0, %eax
9646; NoVLX-NEXT:    vzeroupper
9647; NoVLX-NEXT:    retq
9648entry:
9649  %0 = bitcast <2 x i64> %__a to <16 x i8>
9650  %1 = bitcast <2 x i64> %__b to <16 x i8>
9651  %2 = icmp sge <16 x i8> %0, %1
9652  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9653  %4 = bitcast <32 x i1> %3 to i32
9654  ret i32 %4
9655}
9656
9657define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
9658; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9659; VLX:       # %bb.0: # %entry
9660; VLX-NEXT:    vpcmpnltb (%rdi), %xmm0, %k0
9661; VLX-NEXT:    kmovd %k0, %eax
9662; VLX-NEXT:    retq
9663;
9664; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9665; NoVLX:       # %bb.0: # %entry
9666; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
9667; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9668; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9669; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9670; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9671; NoVLX-NEXT:    kmovw %k0, %eax
9672; NoVLX-NEXT:    vzeroupper
9673; NoVLX-NEXT:    retq
9674entry:
9675  %0 = bitcast <2 x i64> %__a to <16 x i8>
9676  %load = load <2 x i64>, ptr %__b
9677  %1 = bitcast <2 x i64> %load to <16 x i8>
9678  %2 = icmp sge <16 x i8> %0, %1
9679  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9680  %4 = bitcast <32 x i1> %3 to i32
9681  ret i32 %4
9682}
9683
9684define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9685; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9686; VLX:       # %bb.0: # %entry
9687; VLX-NEXT:    kmovd %edi, %k1
9688; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9689; VLX-NEXT:    kmovd %k0, %eax
9690; VLX-NEXT:    retq
9691;
9692; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9693; NoVLX:       # %bb.0: # %entry
9694; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9695; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9696; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9697; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9698; NoVLX-NEXT:    kmovw %k0, %eax
9699; NoVLX-NEXT:    andl %edi, %eax
9700; NoVLX-NEXT:    vzeroupper
9701; NoVLX-NEXT:    retq
9702entry:
9703  %0 = bitcast <2 x i64> %__a to <16 x i8>
9704  %1 = bitcast <2 x i64> %__b to <16 x i8>
9705  %2 = icmp sge <16 x i8> %0, %1
9706  %3 = bitcast i16 %__u to <16 x i1>
9707  %4 = and <16 x i1> %2, %3
9708  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9709  %6 = bitcast <32 x i1> %5 to i32
9710  ret i32 %6
9711}
9712
9713define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
9714; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9715; VLX:       # %bb.0: # %entry
9716; VLX-NEXT:    kmovd %edi, %k1
9717; VLX-NEXT:    vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9718; VLX-NEXT:    kmovd %k0, %eax
9719; VLX-NEXT:    retq
9720;
9721; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9722; NoVLX:       # %bb.0: # %entry
9723; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
9724; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9725; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9726; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9727; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9728; NoVLX-NEXT:    kmovw %k0, %eax
9729; NoVLX-NEXT:    andl %edi, %eax
9730; NoVLX-NEXT:    vzeroupper
9731; NoVLX-NEXT:    retq
9732entry:
9733  %0 = bitcast <2 x i64> %__a to <16 x i8>
9734  %load = load <2 x i64>, ptr %__b
9735  %1 = bitcast <2 x i64> %load to <16 x i8>
9736  %2 = icmp sge <16 x i8> %0, %1
9737  %3 = bitcast i16 %__u to <16 x i1>
9738  %4 = and <16 x i1> %2, %3
9739  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9740  %6 = bitcast <32 x i1> %5 to i32
9741  ret i32 %6
9742}
9743
9744
9745define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9746; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9747; VLX:       # %bb.0: # %entry
9748; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0
9749; VLX-NEXT:    kmovq %k0, %rax
9750; VLX-NEXT:    retq
9751;
9752; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9753; NoVLX:       # %bb.0: # %entry
9754; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9755; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9756; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9757; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9758; NoVLX-NEXT:    kmovw %k0, %eax
9759; NoVLX-NEXT:    vzeroupper
9760; NoVLX-NEXT:    retq
9761entry:
9762  %0 = bitcast <2 x i64> %__a to <16 x i8>
9763  %1 = bitcast <2 x i64> %__b to <16 x i8>
9764  %2 = icmp sge <16 x i8> %0, %1
9765  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9766  %4 = bitcast <64 x i1> %3 to i64
9767  ret i64 %4
9768}
9769
9770define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
9771; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9772; VLX:       # %bb.0: # %entry
9773; VLX-NEXT:    vpcmpnltb (%rdi), %xmm0, %k0
9774; VLX-NEXT:    kmovq %k0, %rax
9775; VLX-NEXT:    retq
9776;
9777; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9778; NoVLX:       # %bb.0: # %entry
9779; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
9780; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9781; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9782; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9783; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9784; NoVLX-NEXT:    kmovw %k0, %eax
9785; NoVLX-NEXT:    vzeroupper
9786; NoVLX-NEXT:    retq
9787entry:
9788  %0 = bitcast <2 x i64> %__a to <16 x i8>
9789  %load = load <2 x i64>, ptr %__b
9790  %1 = bitcast <2 x i64> %load to <16 x i8>
9791  %2 = icmp sge <16 x i8> %0, %1
9792  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9793  %4 = bitcast <64 x i1> %3 to i64
9794  ret i64 %4
9795}
9796
9797define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9798; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9799; VLX:       # %bb.0: # %entry
9800; VLX-NEXT:    kmovd %edi, %k1
9801; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9802; VLX-NEXT:    kmovq %k0, %rax
9803; VLX-NEXT:    retq
9804;
9805; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9806; NoVLX:       # %bb.0: # %entry
9807; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9808; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9809; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9810; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9811; NoVLX-NEXT:    kmovw %k0, %eax
9812; NoVLX-NEXT:    andl %edi, %eax
9813; NoVLX-NEXT:    vzeroupper
9814; NoVLX-NEXT:    retq
9815entry:
9816  %0 = bitcast <2 x i64> %__a to <16 x i8>
9817  %1 = bitcast <2 x i64> %__b to <16 x i8>
9818  %2 = icmp sge <16 x i8> %0, %1
9819  %3 = bitcast i16 %__u to <16 x i1>
9820  %4 = and <16 x i1> %2, %3
9821  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9822  %6 = bitcast <64 x i1> %5 to i64
9823  ret i64 %6
9824}
9825
9826define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
9827; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9828; VLX:       # %bb.0: # %entry
9829; VLX-NEXT:    kmovd %edi, %k1
9830; VLX-NEXT:    vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9831; VLX-NEXT:    kmovq %k0, %rax
9832; VLX-NEXT:    retq
9833;
9834; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9835; NoVLX:       # %bb.0: # %entry
9836; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
9837; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9838; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9839; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9840; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9841; NoVLX-NEXT:    kmovw %k0, %eax
9842; NoVLX-NEXT:    andl %edi, %eax
9843; NoVLX-NEXT:    vzeroupper
9844; NoVLX-NEXT:    retq
9845entry:
9846  %0 = bitcast <2 x i64> %__a to <16 x i8>
9847  %load = load <2 x i64>, ptr %__b
9848  %1 = bitcast <2 x i64> %load to <16 x i8>
9849  %2 = icmp sge <16 x i8> %0, %1
9850  %3 = bitcast i16 %__u to <16 x i1>
9851  %4 = and <16 x i1> %2, %3
9852  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9853  %6 = bitcast <64 x i1> %5 to i64
9854  ret i64 %6
9855}
9856
9857
9858define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9859; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9860; VLX:       # %bb.0: # %entry
9861; VLX-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0
9862; VLX-NEXT:    kmovq %k0, %rax
9863; VLX-NEXT:    vzeroupper
9864; VLX-NEXT:    retq
9865;
9866; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9867; NoVLX:       # %bb.0: # %entry
9868; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
9869; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9870; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
9871; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
9872; NoVLX-NEXT:    kmovw %k0, %ecx
9873; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
9874; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9875; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9876; NoVLX-NEXT:    kmovw %k0, %eax
9877; NoVLX-NEXT:    shll $16, %eax
9878; NoVLX-NEXT:    orl %ecx, %eax
9879; NoVLX-NEXT:    vzeroupper
9880; NoVLX-NEXT:    retq
9881entry:
9882  %0 = bitcast <4 x i64> %__a to <32 x i8>
9883  %1 = bitcast <4 x i64> %__b to <32 x i8>
9884  %2 = icmp sge <32 x i8> %0, %1
9885  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9886  %4 = bitcast <64 x i1> %3 to i64
9887  ret i64 %4
9888}
9889
9890define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
9891; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9892; VLX:       # %bb.0: # %entry
9893; VLX-NEXT:    vpcmpnltb (%rdi), %ymm0, %k0
9894; VLX-NEXT:    kmovq %k0, %rax
9895; VLX-NEXT:    vzeroupper
9896; VLX-NEXT:    retq
9897;
9898; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9899; NoVLX:       # %bb.0: # %entry
9900; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
9901; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
9902; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9903; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
9904; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
9905; NoVLX-NEXT:    kmovw %k0, %ecx
9906; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
9907; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9908; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9909; NoVLX-NEXT:    kmovw %k0, %eax
9910; NoVLX-NEXT:    shll $16, %eax
9911; NoVLX-NEXT:    orl %ecx, %eax
9912; NoVLX-NEXT:    vzeroupper
9913; NoVLX-NEXT:    retq
9914entry:
9915  %0 = bitcast <4 x i64> %__a to <32 x i8>
9916  %load = load <4 x i64>, ptr %__b
9917  %1 = bitcast <4 x i64> %load to <32 x i8>
9918  %2 = icmp sge <32 x i8> %0, %1
9919  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9920  %4 = bitcast <64 x i1> %3 to i64
9921  ret i64 %4
9922}
9923
9924define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9925; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9926; VLX:       # %bb.0: # %entry
9927; VLX-NEXT:    kmovd %edi, %k1
9928; VLX-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
9929; VLX-NEXT:    kmovq %k0, %rax
9930; VLX-NEXT:    vzeroupper
9931; VLX-NEXT:    retq
9932;
9933; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9934; NoVLX:       # %bb.0: # %entry
9935; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
9936; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9937; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
9938; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
9939; NoVLX-NEXT:    kmovw %k0, %eax
9940; NoVLX-NEXT:    andl %edi, %eax
9941; NoVLX-NEXT:    shrl $16, %edi
9942; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
9943; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9944; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9945; NoVLX-NEXT:    kmovw %k0, %ecx
9946; NoVLX-NEXT:    andl %edi, %ecx
9947; NoVLX-NEXT:    shll $16, %ecx
9948; NoVLX-NEXT:    movzwl %ax, %eax
9949; NoVLX-NEXT:    orl %ecx, %eax
9950; NoVLX-NEXT:    vzeroupper
9951; NoVLX-NEXT:    retq
9952entry:
9953  %0 = bitcast <4 x i64> %__a to <32 x i8>
9954  %1 = bitcast <4 x i64> %__b to <32 x i8>
9955  %2 = icmp sge <32 x i8> %0, %1
9956  %3 = bitcast i32 %__u to <32 x i1>
9957  %4 = and <32 x i1> %2, %3
9958  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9959  %6 = bitcast <64 x i1> %5 to i64
9960  ret i64 %6
9961}
9962
9963define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9964; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9965; VLX:       # %bb.0: # %entry
9966; VLX-NEXT:    kmovd %edi, %k1
9967; VLX-NEXT:    vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
9968; VLX-NEXT:    kmovq %k0, %rax
9969; VLX-NEXT:    vzeroupper
9970; VLX-NEXT:    retq
9971;
9972; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9973; NoVLX:       # %bb.0: # %entry
9974; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
9975; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
9976; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9977; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
9978; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
9979; NoVLX-NEXT:    kmovw %k0, %eax
9980; NoVLX-NEXT:    andl %edi, %eax
9981; NoVLX-NEXT:    shrl $16, %edi
9982; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
9983; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9984; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9985; NoVLX-NEXT:    kmovw %k0, %ecx
9986; NoVLX-NEXT:    andl %edi, %ecx
9987; NoVLX-NEXT:    shll $16, %ecx
9988; NoVLX-NEXT:    movzwl %ax, %eax
9989; NoVLX-NEXT:    orl %ecx, %eax
9990; NoVLX-NEXT:    vzeroupper
9991; NoVLX-NEXT:    retq
9992entry:
9993  %0 = bitcast <4 x i64> %__a to <32 x i8>
9994  %load = load <4 x i64>, ptr %__b
9995  %1 = bitcast <4 x i64> %load to <32 x i8>
9996  %2 = icmp sge <32 x i8> %0, %1
9997  %3 = bitcast i32 %__u to <32 x i1>
9998  %4 = and <32 x i1> %2, %3
9999  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10000  %6 = bitcast <64 x i1> %5 to i64
10001  ret i64 %6
10002}
10003
10004
10005define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10006; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10007; VLX:       # %bb.0: # %entry
10008; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10009; VLX-NEXT:    kmovd %k0, %eax
10010; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10011; VLX-NEXT:    retq
10012;
10013; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10014; NoVLX:       # %bb.0: # %entry
10015; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10016; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10017; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10018; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10019; NoVLX-NEXT:    kmovw %k0, %eax
10020; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10021; NoVLX-NEXT:    vzeroupper
10022; NoVLX-NEXT:    retq
10023entry:
10024  %0 = bitcast <2 x i64> %__a to <8 x i16>
10025  %1 = bitcast <2 x i64> %__b to <8 x i16>
10026  %2 = icmp sge <8 x i16> %0, %1
10027  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10028  %4 = bitcast <16 x i1> %3 to i16
10029  ret i16 %4
10030}
10031
10032define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10033; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10034; VLX:       # %bb.0: # %entry
10035; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10036; VLX-NEXT:    kmovd %k0, %eax
10037; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10038; VLX-NEXT:    retq
10039;
10040; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10041; NoVLX:       # %bb.0: # %entry
10042; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10043; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10044; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10045; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10046; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10047; NoVLX-NEXT:    kmovw %k0, %eax
10048; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10049; NoVLX-NEXT:    vzeroupper
10050; NoVLX-NEXT:    retq
10051entry:
10052  %0 = bitcast <2 x i64> %__a to <8 x i16>
10053  %load = load <2 x i64>, ptr %__b
10054  %1 = bitcast <2 x i64> %load to <8 x i16>
10055  %2 = icmp sge <8 x i16> %0, %1
10056  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10057  %4 = bitcast <16 x i1> %3 to i16
10058  ret i16 %4
10059}
10060
10061define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10062; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10063; VLX:       # %bb.0: # %entry
10064; VLX-NEXT:    kmovd %edi, %k1
10065; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10066; VLX-NEXT:    kmovd %k0, %eax
10067; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10068; VLX-NEXT:    retq
10069;
10070; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10071; NoVLX:       # %bb.0: # %entry
10072; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10073; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10074; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10075; NoVLX-NEXT:    kmovw %edi, %k1
10076; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10077; NoVLX-NEXT:    kmovw %k0, %eax
10078; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10079; NoVLX-NEXT:    vzeroupper
10080; NoVLX-NEXT:    retq
10081entry:
10082  %0 = bitcast <2 x i64> %__a to <8 x i16>
10083  %1 = bitcast <2 x i64> %__b to <8 x i16>
10084  %2 = icmp sge <8 x i16> %0, %1
10085  %3 = bitcast i8 %__u to <8 x i1>
10086  %4 = and <8 x i1> %2, %3
10087  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10088  %6 = bitcast <16 x i1> %5 to i16
10089  ret i16 %6
10090}
10091
10092define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10093; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10094; VLX:       # %bb.0: # %entry
10095; VLX-NEXT:    kmovd %edi, %k1
10096; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10097; VLX-NEXT:    kmovd %k0, %eax
10098; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10099; VLX-NEXT:    retq
10100;
10101; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10102; NoVLX:       # %bb.0: # %entry
10103; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10104; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10105; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10106; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10107; NoVLX-NEXT:    kmovw %edi, %k1
10108; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10109; NoVLX-NEXT:    kmovw %k0, %eax
10110; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10111; NoVLX-NEXT:    vzeroupper
10112; NoVLX-NEXT:    retq
10113entry:
10114  %0 = bitcast <2 x i64> %__a to <8 x i16>
10115  %load = load <2 x i64>, ptr %__b
10116  %1 = bitcast <2 x i64> %load to <8 x i16>
10117  %2 = icmp sge <8 x i16> %0, %1
10118  %3 = bitcast i8 %__u to <8 x i1>
10119  %4 = and <8 x i1> %2, %3
10120  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10121  %6 = bitcast <16 x i1> %5 to i16
10122  ret i16 %6
10123}
10124
10125
10126define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10127; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10128; VLX:       # %bb.0: # %entry
10129; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10130; VLX-NEXT:    kmovd %k0, %eax
10131; VLX-NEXT:    retq
10132;
10133; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10134; NoVLX:       # %bb.0: # %entry
10135; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10136; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10137; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10138; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10139; NoVLX-NEXT:    kmovw %k0, %eax
10140; NoVLX-NEXT:    vzeroupper
10141; NoVLX-NEXT:    retq
10142entry:
10143  %0 = bitcast <2 x i64> %__a to <8 x i16>
10144  %1 = bitcast <2 x i64> %__b to <8 x i16>
10145  %2 = icmp sge <8 x i16> %0, %1
10146  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10147  %4 = bitcast <32 x i1> %3 to i32
10148  ret i32 %4
10149}
10150
10151define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10152; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10153; VLX:       # %bb.0: # %entry
10154; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10155; VLX-NEXT:    kmovd %k0, %eax
10156; VLX-NEXT:    retq
10157;
10158; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10159; NoVLX:       # %bb.0: # %entry
10160; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10161; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10162; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10163; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10164; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10165; NoVLX-NEXT:    kmovw %k0, %eax
10166; NoVLX-NEXT:    vzeroupper
10167; NoVLX-NEXT:    retq
10168entry:
10169  %0 = bitcast <2 x i64> %__a to <8 x i16>
10170  %load = load <2 x i64>, ptr %__b
10171  %1 = bitcast <2 x i64> %load to <8 x i16>
10172  %2 = icmp sge <8 x i16> %0, %1
10173  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10174  %4 = bitcast <32 x i1> %3 to i32
10175  ret i32 %4
10176}
10177
10178define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10179; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10180; VLX:       # %bb.0: # %entry
10181; VLX-NEXT:    kmovd %edi, %k1
10182; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10183; VLX-NEXT:    kmovd %k0, %eax
10184; VLX-NEXT:    retq
10185;
10186; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10187; NoVLX:       # %bb.0: # %entry
10188; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10189; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10190; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10191; NoVLX-NEXT:    kmovw %edi, %k1
10192; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10193; NoVLX-NEXT:    kmovw %k0, %eax
10194; NoVLX-NEXT:    vzeroupper
10195; NoVLX-NEXT:    retq
10196entry:
10197  %0 = bitcast <2 x i64> %__a to <8 x i16>
10198  %1 = bitcast <2 x i64> %__b to <8 x i16>
10199  %2 = icmp sge <8 x i16> %0, %1
10200  %3 = bitcast i8 %__u to <8 x i1>
10201  %4 = and <8 x i1> %2, %3
10202  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10203  %6 = bitcast <32 x i1> %5 to i32
10204  ret i32 %6
10205}
10206
10207define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10208; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10209; VLX:       # %bb.0: # %entry
10210; VLX-NEXT:    kmovd %edi, %k1
10211; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10212; VLX-NEXT:    kmovd %k0, %eax
10213; VLX-NEXT:    retq
10214;
10215; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10216; NoVLX:       # %bb.0: # %entry
10217; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10218; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10219; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10220; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10221; NoVLX-NEXT:    kmovw %edi, %k1
10222; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10223; NoVLX-NEXT:    kmovw %k0, %eax
10224; NoVLX-NEXT:    vzeroupper
10225; NoVLX-NEXT:    retq
10226entry:
10227  %0 = bitcast <2 x i64> %__a to <8 x i16>
10228  %load = load <2 x i64>, ptr %__b
10229  %1 = bitcast <2 x i64> %load to <8 x i16>
10230  %2 = icmp sge <8 x i16> %0, %1
10231  %3 = bitcast i8 %__u to <8 x i1>
10232  %4 = and <8 x i1> %2, %3
10233  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10234  %6 = bitcast <32 x i1> %5 to i32
10235  ret i32 %6
10236}
10237
10238
10239define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10240; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10241; VLX:       # %bb.0: # %entry
10242; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10243; VLX-NEXT:    kmovq %k0, %rax
10244; VLX-NEXT:    retq
10245;
10246; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10247; NoVLX:       # %bb.0: # %entry
10248; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10249; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10250; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10251; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10252; NoVLX-NEXT:    kmovw %k0, %eax
10253; NoVLX-NEXT:    vzeroupper
10254; NoVLX-NEXT:    retq
10255entry:
10256  %0 = bitcast <2 x i64> %__a to <8 x i16>
10257  %1 = bitcast <2 x i64> %__b to <8 x i16>
10258  %2 = icmp sge <8 x i16> %0, %1
10259  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10260  %4 = bitcast <64 x i1> %3 to i64
10261  ret i64 %4
10262}
10263
10264define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10265; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10266; VLX:       # %bb.0: # %entry
10267; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10268; VLX-NEXT:    kmovq %k0, %rax
10269; VLX-NEXT:    retq
10270;
10271; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10272; NoVLX:       # %bb.0: # %entry
10273; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10274; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10275; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10276; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10277; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10278; NoVLX-NEXT:    kmovw %k0, %eax
10279; NoVLX-NEXT:    vzeroupper
10280; NoVLX-NEXT:    retq
10281entry:
10282  %0 = bitcast <2 x i64> %__a to <8 x i16>
10283  %load = load <2 x i64>, ptr %__b
10284  %1 = bitcast <2 x i64> %load to <8 x i16>
10285  %2 = icmp sge <8 x i16> %0, %1
10286  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10287  %4 = bitcast <64 x i1> %3 to i64
10288  ret i64 %4
10289}
10290
10291define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10292; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10293; VLX:       # %bb.0: # %entry
10294; VLX-NEXT:    kmovd %edi, %k1
10295; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10296; VLX-NEXT:    kmovq %k0, %rax
10297; VLX-NEXT:    retq
10298;
10299; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10300; NoVLX:       # %bb.0: # %entry
10301; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10302; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10303; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10304; NoVLX-NEXT:    kmovw %edi, %k1
10305; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10306; NoVLX-NEXT:    kmovw %k0, %eax
10307; NoVLX-NEXT:    vzeroupper
10308; NoVLX-NEXT:    retq
10309entry:
10310  %0 = bitcast <2 x i64> %__a to <8 x i16>
10311  %1 = bitcast <2 x i64> %__b to <8 x i16>
10312  %2 = icmp sge <8 x i16> %0, %1
10313  %3 = bitcast i8 %__u to <8 x i1>
10314  %4 = and <8 x i1> %2, %3
10315  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10316  %6 = bitcast <64 x i1> %5 to i64
10317  ret i64 %6
10318}
10319
10320define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10321; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10322; VLX:       # %bb.0: # %entry
10323; VLX-NEXT:    kmovd %edi, %k1
10324; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10325; VLX-NEXT:    kmovq %k0, %rax
10326; VLX-NEXT:    retq
10327;
10328; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10329; NoVLX:       # %bb.0: # %entry
10330; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10331; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10332; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10333; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10334; NoVLX-NEXT:    kmovw %edi, %k1
10335; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10336; NoVLX-NEXT:    kmovw %k0, %eax
10337; NoVLX-NEXT:    vzeroupper
10338; NoVLX-NEXT:    retq
10339entry:
10340  %0 = bitcast <2 x i64> %__a to <8 x i16>
10341  %load = load <2 x i64>, ptr %__b
10342  %1 = bitcast <2 x i64> %load to <8 x i16>
10343  %2 = icmp sge <8 x i16> %0, %1
10344  %3 = bitcast i8 %__u to <8 x i1>
10345  %4 = and <8 x i1> %2, %3
10346  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10347  %6 = bitcast <64 x i1> %5 to i64
10348  ret i64 %6
10349}
10350
10351
10352define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10353; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10354; VLX:       # %bb.0: # %entry
10355; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0
10356; VLX-NEXT:    kmovd %k0, %eax
10357; VLX-NEXT:    vzeroupper
10358; VLX-NEXT:    retq
10359;
10360; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10361; NoVLX:       # %bb.0: # %entry
10362; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10363; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10364; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10365; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10366; NoVLX-NEXT:    kmovw %k0, %eax
10367; NoVLX-NEXT:    vzeroupper
10368; NoVLX-NEXT:    retq
10369entry:
10370  %0 = bitcast <4 x i64> %__a to <16 x i16>
10371  %1 = bitcast <4 x i64> %__b to <16 x i16>
10372  %2 = icmp sge <16 x i16> %0, %1
10373  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10374  %4 = bitcast <32 x i1> %3 to i32
10375  ret i32 %4
10376}
10377
10378define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
10379; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10380; VLX:       # %bb.0: # %entry
10381; VLX-NEXT:    vpcmpnltw (%rdi), %ymm0, %k0
10382; VLX-NEXT:    kmovd %k0, %eax
10383; VLX-NEXT:    vzeroupper
10384; VLX-NEXT:    retq
10385;
10386; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10387; NoVLX:       # %bb.0: # %entry
10388; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10389; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10390; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10391; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10392; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10393; NoVLX-NEXT:    kmovw %k0, %eax
10394; NoVLX-NEXT:    vzeroupper
10395; NoVLX-NEXT:    retq
10396entry:
10397  %0 = bitcast <4 x i64> %__a to <16 x i16>
10398  %load = load <4 x i64>, ptr %__b
10399  %1 = bitcast <4 x i64> %load to <16 x i16>
10400  %2 = icmp sge <16 x i16> %0, %1
10401  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10402  %4 = bitcast <32 x i1> %3 to i32
10403  ret i32 %4
10404}
10405
10406define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10407; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10408; VLX:       # %bb.0: # %entry
10409; VLX-NEXT:    kmovd %edi, %k1
10410; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10411; VLX-NEXT:    kmovd %k0, %eax
10412; VLX-NEXT:    vzeroupper
10413; VLX-NEXT:    retq
10414;
10415; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10416; NoVLX:       # %bb.0: # %entry
10417; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10418; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10419; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10420; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10421; NoVLX-NEXT:    kmovw %k0, %eax
10422; NoVLX-NEXT:    andl %edi, %eax
10423; NoVLX-NEXT:    vzeroupper
10424; NoVLX-NEXT:    retq
10425entry:
10426  %0 = bitcast <4 x i64> %__a to <16 x i16>
10427  %1 = bitcast <4 x i64> %__b to <16 x i16>
10428  %2 = icmp sge <16 x i16> %0, %1
10429  %3 = bitcast i16 %__u to <16 x i1>
10430  %4 = and <16 x i1> %2, %3
10431  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10432  %6 = bitcast <32 x i1> %5 to i32
10433  ret i32 %6
10434}
10435
10436define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
10437; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10438; VLX:       # %bb.0: # %entry
10439; VLX-NEXT:    kmovd %edi, %k1
10440; VLX-NEXT:    vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10441; VLX-NEXT:    kmovd %k0, %eax
10442; VLX-NEXT:    vzeroupper
10443; VLX-NEXT:    retq
10444;
10445; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10446; NoVLX:       # %bb.0: # %entry
10447; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10448; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10449; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10450; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10451; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10452; NoVLX-NEXT:    kmovw %k0, %eax
10453; NoVLX-NEXT:    andl %edi, %eax
10454; NoVLX-NEXT:    vzeroupper
10455; NoVLX-NEXT:    retq
10456entry:
10457  %0 = bitcast <4 x i64> %__a to <16 x i16>
10458  %load = load <4 x i64>, ptr %__b
10459  %1 = bitcast <4 x i64> %load to <16 x i16>
10460  %2 = icmp sge <16 x i16> %0, %1
10461  %3 = bitcast i16 %__u to <16 x i1>
10462  %4 = and <16 x i1> %2, %3
10463  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10464  %6 = bitcast <32 x i1> %5 to i32
10465  ret i32 %6
10466}
10467
10468
10469define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10470; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10471; VLX:       # %bb.0: # %entry
10472; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0
10473; VLX-NEXT:    kmovq %k0, %rax
10474; VLX-NEXT:    vzeroupper
10475; VLX-NEXT:    retq
10476;
10477; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10478; NoVLX:       # %bb.0: # %entry
10479; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10480; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10481; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10482; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10483; NoVLX-NEXT:    kmovw %k0, %eax
10484; NoVLX-NEXT:    vzeroupper
10485; NoVLX-NEXT:    retq
10486entry:
10487  %0 = bitcast <4 x i64> %__a to <16 x i16>
10488  %1 = bitcast <4 x i64> %__b to <16 x i16>
10489  %2 = icmp sge <16 x i16> %0, %1
10490  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10491  %4 = bitcast <64 x i1> %3 to i64
10492  ret i64 %4
10493}
10494
10495define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
10496; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10497; VLX:       # %bb.0: # %entry
10498; VLX-NEXT:    vpcmpnltw (%rdi), %ymm0, %k0
10499; VLX-NEXT:    kmovq %k0, %rax
10500; VLX-NEXT:    vzeroupper
10501; VLX-NEXT:    retq
10502;
10503; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10504; NoVLX:       # %bb.0: # %entry
10505; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10506; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10507; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10508; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10509; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10510; NoVLX-NEXT:    kmovw %k0, %eax
10511; NoVLX-NEXT:    vzeroupper
10512; NoVLX-NEXT:    retq
10513entry:
10514  %0 = bitcast <4 x i64> %__a to <16 x i16>
10515  %load = load <4 x i64>, ptr %__b
10516  %1 = bitcast <4 x i64> %load to <16 x i16>
10517  %2 = icmp sge <16 x i16> %0, %1
10518  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10519  %4 = bitcast <64 x i1> %3 to i64
10520  ret i64 %4
10521}
10522
10523define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10524; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10525; VLX:       # %bb.0: # %entry
10526; VLX-NEXT:    kmovd %edi, %k1
10527; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10528; VLX-NEXT:    kmovq %k0, %rax
10529; VLX-NEXT:    vzeroupper
10530; VLX-NEXT:    retq
10531;
10532; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10533; NoVLX:       # %bb.0: # %entry
10534; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10535; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10536; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10537; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10538; NoVLX-NEXT:    kmovw %k0, %eax
10539; NoVLX-NEXT:    andl %edi, %eax
10540; NoVLX-NEXT:    vzeroupper
10541; NoVLX-NEXT:    retq
10542entry:
10543  %0 = bitcast <4 x i64> %__a to <16 x i16>
10544  %1 = bitcast <4 x i64> %__b to <16 x i16>
10545  %2 = icmp sge <16 x i16> %0, %1
10546  %3 = bitcast i16 %__u to <16 x i1>
10547  %4 = and <16 x i1> %2, %3
10548  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10549  %6 = bitcast <64 x i1> %5 to i64
10550  ret i64 %6
10551}
10552
10553define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
10554; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10555; VLX:       # %bb.0: # %entry
10556; VLX-NEXT:    kmovd %edi, %k1
10557; VLX-NEXT:    vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10558; VLX-NEXT:    kmovq %k0, %rax
10559; VLX-NEXT:    vzeroupper
10560; VLX-NEXT:    retq
10561;
10562; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10563; NoVLX:       # %bb.0: # %entry
10564; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10565; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10566; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10567; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10568; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10569; NoVLX-NEXT:    kmovw %k0, %eax
10570; NoVLX-NEXT:    andl %edi, %eax
10571; NoVLX-NEXT:    vzeroupper
10572; NoVLX-NEXT:    retq
10573entry:
10574  %0 = bitcast <4 x i64> %__a to <16 x i16>
10575  %load = load <4 x i64>, ptr %__b
10576  %1 = bitcast <4 x i64> %load to <16 x i16>
10577  %2 = icmp sge <16 x i16> %0, %1
10578  %3 = bitcast i16 %__u to <16 x i1>
10579  %4 = and <16 x i1> %2, %3
10580  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10581  %6 = bitcast <64 x i1> %5 to i64
10582  ret i64 %6
10583}
10584
10585
10586define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10587; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10588; VLX:       # %bb.0: # %entry
10589; VLX-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
10590; VLX-NEXT:    kmovq %k0, %rax
10591; VLX-NEXT:    vzeroupper
10592; VLX-NEXT:    retq
10593;
10594; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10595; NoVLX:       # %bb.0: # %entry
10596; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm2
10597; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
10598; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
10599; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
10600; NoVLX-NEXT:    kmovw %k0, %ecx
10601; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10602; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
10603; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10604; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10605; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10606; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10607; NoVLX-NEXT:    kmovw %k0, %eax
10608; NoVLX-NEXT:    shll $16, %eax
10609; NoVLX-NEXT:    orl %ecx, %eax
10610; NoVLX-NEXT:    vzeroupper
10611; NoVLX-NEXT:    retq
10612entry:
10613  %0 = bitcast <8 x i64> %__a to <32 x i16>
10614  %1 = bitcast <8 x i64> %__b to <32 x i16>
10615  %2 = icmp sge <32 x i16> %0, %1
10616  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10617  %4 = bitcast <64 x i1> %3 to i64
10618  ret i64 %4
10619}
10620
10621define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
10622; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10623; VLX:       # %bb.0: # %entry
10624; VLX-NEXT:    vpcmpnltw (%rdi), %zmm0, %k0
10625; VLX-NEXT:    kmovq %k0, %rax
10626; VLX-NEXT:    vzeroupper
10627; VLX-NEXT:    retq
10628;
10629; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10630; NoVLX:       # %bb.0: # %entry
10631; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10632; NoVLX-NEXT:    vmovdqa 32(%rdi), %ymm2
10633; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm1
10634; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
10635; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
10636; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10637; NoVLX-NEXT:    kmovw %k0, %ecx
10638; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10639; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
10640; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10641; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10642; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10643; NoVLX-NEXT:    kmovw %k0, %eax
10644; NoVLX-NEXT:    shll $16, %eax
10645; NoVLX-NEXT:    orl %ecx, %eax
10646; NoVLX-NEXT:    vzeroupper
10647; NoVLX-NEXT:    retq
10648entry:
10649  %0 = bitcast <8 x i64> %__a to <32 x i16>
10650  %load = load <8 x i64>, ptr %__b
10651  %1 = bitcast <8 x i64> %load to <32 x i16>
10652  %2 = icmp sge <32 x i16> %0, %1
10653  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10654  %4 = bitcast <64 x i1> %3 to i64
10655  ret i64 %4
10656}
10657
10658define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10659; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10660; VLX:       # %bb.0: # %entry
10661; VLX-NEXT:    kmovd %edi, %k1
10662; VLX-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10663; VLX-NEXT:    kmovq %k0, %rax
10664; VLX-NEXT:    vzeroupper
10665; VLX-NEXT:    retq
10666;
10667; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10668; NoVLX:       # %bb.0: # %entry
10669; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm2
10670; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
10671; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
10672; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
10673; NoVLX-NEXT:    kmovw %k0, %eax
10674; NoVLX-NEXT:    andl %edi, %eax
10675; NoVLX-NEXT:    shrl $16, %edi
10676; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10677; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
10678; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10679; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10680; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10681; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10682; NoVLX-NEXT:    kmovw %k0, %ecx
10683; NoVLX-NEXT:    andl %edi, %ecx
10684; NoVLX-NEXT:    shll $16, %ecx
10685; NoVLX-NEXT:    movzwl %ax, %eax
10686; NoVLX-NEXT:    orl %ecx, %eax
10687; NoVLX-NEXT:    vzeroupper
10688; NoVLX-NEXT:    retq
10689entry:
10690  %0 = bitcast <8 x i64> %__a to <32 x i16>
10691  %1 = bitcast <8 x i64> %__b to <32 x i16>
10692  %2 = icmp sge <32 x i16> %0, %1
10693  %3 = bitcast i32 %__u to <32 x i1>
10694  %4 = and <32 x i1> %2, %3
10695  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10696  %6 = bitcast <64 x i1> %5 to i64
10697  ret i64 %6
10698}
10699
10700define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
10701; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10702; VLX:       # %bb.0: # %entry
10703; VLX-NEXT:    kmovd %edi, %k1
10704; VLX-NEXT:    vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10705; VLX-NEXT:    kmovq %k0, %rax
10706; VLX-NEXT:    vzeroupper
10707; VLX-NEXT:    retq
10708;
10709; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10710; NoVLX:       # %bb.0: # %entry
10711; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10712; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm1
10713; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
10714; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
10715; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10716; NoVLX-NEXT:    kmovw %k0, %eax
10717; NoVLX-NEXT:    andl %edi, %eax
10718; NoVLX-NEXT:    shrl $16, %edi
10719; NoVLX-NEXT:    vmovdqa 32(%rsi), %ymm1
10720; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10721; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10722; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10723; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10724; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10725; NoVLX-NEXT:    kmovw %k0, %ecx
10726; NoVLX-NEXT:    andl %edi, %ecx
10727; NoVLX-NEXT:    shll $16, %ecx
10728; NoVLX-NEXT:    movzwl %ax, %eax
10729; NoVLX-NEXT:    orl %ecx, %eax
10730; NoVLX-NEXT:    vzeroupper
10731; NoVLX-NEXT:    retq
10732entry:
10733  %0 = bitcast <8 x i64> %__a to <32 x i16>
10734  %load = load <8 x i64>, ptr %__b
10735  %1 = bitcast <8 x i64> %load to <32 x i16>
10736  %2 = icmp sge <32 x i16> %0, %1
10737  %3 = bitcast i32 %__u to <32 x i1>
10738  %4 = and <32 x i1> %2, %3
10739  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10740  %6 = bitcast <64 x i1> %5 to i64
10741  ret i64 %6
10742}
10743
10744
10745define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10746; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10747; VLX:       # %bb.0: # %entry
10748; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
10749; VLX-NEXT:    kmovd %k0, %eax
10750; VLX-NEXT:    # kill: def $al killed $al killed $eax
10751; VLX-NEXT:    retq
10752;
10753; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10754; NoVLX:       # %bb.0: # %entry
10755; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
10756; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10757; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10758; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10759; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10760; NoVLX-NEXT:    kmovw %k0, %eax
10761; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10762; NoVLX-NEXT:    vzeroupper
10763; NoVLX-NEXT:    retq
10764entry:
10765  %0 = bitcast <2 x i64> %__a to <4 x i32>
10766  %1 = bitcast <2 x i64> %__b to <4 x i32>
10767  %2 = icmp sge <4 x i32> %0, %1
10768  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10769  %4 = bitcast <8 x i1> %3 to i8
10770  ret i8 %4
10771}
10772
10773define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10774; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10775; VLX:       # %bb.0: # %entry
10776; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
10777; VLX-NEXT:    kmovd %k0, %eax
10778; VLX-NEXT:    # kill: def $al killed $al killed $eax
10779; VLX-NEXT:    retq
10780;
10781; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10782; NoVLX:       # %bb.0: # %entry
10783; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10784; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10785; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10786; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10787; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10788; NoVLX-NEXT:    kmovw %k0, %eax
10789; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10790; NoVLX-NEXT:    vzeroupper
10791; NoVLX-NEXT:    retq
10792entry:
10793  %0 = bitcast <2 x i64> %__a to <4 x i32>
10794  %load = load <2 x i64>, ptr %__b
10795  %1 = bitcast <2 x i64> %load to <4 x i32>
10796  %2 = icmp sge <4 x i32> %0, %1
10797  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10798  %4 = bitcast <8 x i1> %3 to i8
10799  ret i8 %4
10800}
10801
10802define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10803; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10804; VLX:       # %bb.0: # %entry
10805; VLX-NEXT:    kmovd %edi, %k1
10806; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10807; VLX-NEXT:    kmovd %k0, %eax
10808; VLX-NEXT:    # kill: def $al killed $al killed $eax
10809; VLX-NEXT:    retq
10810;
10811; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10812; NoVLX:       # %bb.0: # %entry
10813; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
10814; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10815; NoVLX-NEXT:    kmovw %edi, %k1
10816; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10817; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10818; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10819; NoVLX-NEXT:    kmovw %k0, %eax
10820; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10821; NoVLX-NEXT:    vzeroupper
10822; NoVLX-NEXT:    retq
10823entry:
10824  %0 = bitcast <2 x i64> %__a to <4 x i32>
10825  %1 = bitcast <2 x i64> %__b to <4 x i32>
10826  %2 = icmp sge <4 x i32> %0, %1
10827  %3 = bitcast i8 %__u to <8 x i1>
10828  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10829  %4 = and <4 x i1> %2, %extract.i
10830  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10831  %6 = bitcast <8 x i1> %5 to i8
10832  ret i8 %6
10833}
10834
10835define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10836; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10837; VLX:       # %bb.0: # %entry
10838; VLX-NEXT:    kmovd %edi, %k1
10839; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
10840; VLX-NEXT:    kmovd %k0, %eax
10841; VLX-NEXT:    # kill: def $al killed $al killed $eax
10842; VLX-NEXT:    retq
10843;
10844; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10845; NoVLX:       # %bb.0: # %entry
10846; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10847; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10848; NoVLX-NEXT:    kmovw %edi, %k1
10849; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10850; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10851; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10852; NoVLX-NEXT:    kmovw %k0, %eax
10853; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10854; NoVLX-NEXT:    vzeroupper
10855; NoVLX-NEXT:    retq
10856entry:
10857  %0 = bitcast <2 x i64> %__a to <4 x i32>
10858  %load = load <2 x i64>, ptr %__b
10859  %1 = bitcast <2 x i64> %load to <4 x i32>
10860  %2 = icmp sge <4 x i32> %0, %1
10861  %3 = bitcast i8 %__u to <8 x i1>
10862  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10863  %4 = and <4 x i1> %2, %extract.i
10864  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10865  %6 = bitcast <8 x i1> %5 to i8
10866  ret i8 %6
10867}
10868
10869
10870define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10871; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10872; VLX:       # %bb.0: # %entry
10873; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
10874; VLX-NEXT:    kmovd %k0, %eax
10875; VLX-NEXT:    # kill: def $al killed $al killed $eax
10876; VLX-NEXT:    retq
10877;
10878; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10879; NoVLX:       # %bb.0: # %entry
10880; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10881; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
10882; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10883; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10884; NoVLX-NEXT:    kmovw %k0, %eax
10885; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10886; NoVLX-NEXT:    vzeroupper
10887; NoVLX-NEXT:    retq
10888entry:
10889  %0 = bitcast <2 x i64> %__a to <4 x i32>
10890  %load = load i32, ptr %__b
10891  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10892  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10893  %2 = icmp sge <4 x i32> %0, %1
10894  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10895  %4 = bitcast <8 x i1> %3 to i8
10896  ret i8 %4
10897}
10898
10899define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10900; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10901; VLX:       # %bb.0: # %entry
10902; VLX-NEXT:    kmovd %edi, %k1
10903; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
10904; VLX-NEXT:    kmovd %k0, %eax
10905; VLX-NEXT:    # kill: def $al killed $al killed $eax
10906; VLX-NEXT:    retq
10907;
10908; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10909; NoVLX:       # %bb.0: # %entry
10910; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10911; NoVLX-NEXT:    kmovw %edi, %k1
10912; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
10913; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10914; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10915; NoVLX-NEXT:    kmovw %k0, %eax
10916; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10917; NoVLX-NEXT:    vzeroupper
10918; NoVLX-NEXT:    retq
10919entry:
10920  %0 = bitcast <2 x i64> %__a to <4 x i32>
10921  %load = load i32, ptr %__b
10922  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10923  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10924  %2 = icmp sge <4 x i32> %0, %1
10925  %3 = bitcast i8 %__u to <8 x i1>
10926  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10927  %4 = and <4 x i1> %extract.i, %2
10928  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10929  %6 = bitcast <8 x i1> %5 to i8
10930  ret i8 %6
10931}
10932
10933
10934define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10935; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10936; VLX:       # %bb.0: # %entry
10937; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
10938; VLX-NEXT:    kmovd %k0, %eax
10939; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10940; VLX-NEXT:    retq
10941;
10942; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10943; NoVLX:       # %bb.0: # %entry
10944; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
10945; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10946; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10947; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10948; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10949; NoVLX-NEXT:    kmovw %k0, %eax
10950; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10951; NoVLX-NEXT:    vzeroupper
10952; NoVLX-NEXT:    retq
10953entry:
10954  %0 = bitcast <2 x i64> %__a to <4 x i32>
10955  %1 = bitcast <2 x i64> %__b to <4 x i32>
10956  %2 = icmp sge <4 x i32> %0, %1
10957  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10958  %4 = bitcast <16 x i1> %3 to i16
10959  ret i16 %4
10960}
10961
10962define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10963; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10964; VLX:       # %bb.0: # %entry
10965; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
10966; VLX-NEXT:    kmovd %k0, %eax
10967; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10968; VLX-NEXT:    retq
10969;
10970; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10971; NoVLX:       # %bb.0: # %entry
10972; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10973; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10974; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10975; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10976; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10977; NoVLX-NEXT:    kmovw %k0, %eax
10978; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10979; NoVLX-NEXT:    vzeroupper
10980; NoVLX-NEXT:    retq
10981entry:
10982  %0 = bitcast <2 x i64> %__a to <4 x i32>
10983  %load = load <2 x i64>, ptr %__b
10984  %1 = bitcast <2 x i64> %load to <4 x i32>
10985  %2 = icmp sge <4 x i32> %0, %1
10986  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10987  %4 = bitcast <16 x i1> %3 to i16
10988  ret i16 %4
10989}
10990
10991define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10992; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
10993; VLX:       # %bb.0: # %entry
10994; VLX-NEXT:    kmovd %edi, %k1
10995; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10996; VLX-NEXT:    kmovd %k0, %eax
10997; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10998; VLX-NEXT:    retq
10999;
11000; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11001; NoVLX:       # %bb.0: # %entry
11002; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11003; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11004; NoVLX-NEXT:    kmovw %edi, %k1
11005; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11006; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11007; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11008; NoVLX-NEXT:    kmovw %k0, %eax
11009; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11010; NoVLX-NEXT:    vzeroupper
11011; NoVLX-NEXT:    retq
11012entry:
11013  %0 = bitcast <2 x i64> %__a to <4 x i32>
11014  %1 = bitcast <2 x i64> %__b to <4 x i32>
11015  %2 = icmp sge <4 x i32> %0, %1
11016  %3 = bitcast i8 %__u to <8 x i1>
11017  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11018  %4 = and <4 x i1> %2, %extract.i
11019  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11020  %6 = bitcast <16 x i1> %5 to i16
11021  ret i16 %6
11022}
11023
11024define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11025; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11026; VLX:       # %bb.0: # %entry
11027; VLX-NEXT:    kmovd %edi, %k1
11028; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11029; VLX-NEXT:    kmovd %k0, %eax
11030; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11031; VLX-NEXT:    retq
11032;
11033; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11034; NoVLX:       # %bb.0: # %entry
11035; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11036; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11037; NoVLX-NEXT:    kmovw %edi, %k1
11038; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11039; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11040; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11041; NoVLX-NEXT:    kmovw %k0, %eax
11042; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11043; NoVLX-NEXT:    vzeroupper
11044; NoVLX-NEXT:    retq
11045entry:
11046  %0 = bitcast <2 x i64> %__a to <4 x i32>
11047  %load = load <2 x i64>, ptr %__b
11048  %1 = bitcast <2 x i64> %load to <4 x i32>
11049  %2 = icmp sge <4 x i32> %0, %1
11050  %3 = bitcast i8 %__u to <8 x i1>
11051  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11052  %4 = and <4 x i1> %2, %extract.i
11053  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11054  %6 = bitcast <16 x i1> %5 to i16
11055  ret i16 %6
11056}
11057
11058
11059define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11060; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11061; VLX:       # %bb.0: # %entry
11062; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11063; VLX-NEXT:    kmovd %k0, %eax
11064; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11065; VLX-NEXT:    retq
11066;
11067; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11068; NoVLX:       # %bb.0: # %entry
11069; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11070; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11071; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11072; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11073; NoVLX-NEXT:    kmovw %k0, %eax
11074; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11075; NoVLX-NEXT:    vzeroupper
11076; NoVLX-NEXT:    retq
11077entry:
11078  %0 = bitcast <2 x i64> %__a to <4 x i32>
11079  %load = load i32, ptr %__b
11080  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11081  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11082  %2 = icmp sge <4 x i32> %0, %1
11083  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11084  %4 = bitcast <16 x i1> %3 to i16
11085  ret i16 %4
11086}
11087
11088define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11089; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11090; VLX:       # %bb.0: # %entry
11091; VLX-NEXT:    kmovd %edi, %k1
11092; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11093; VLX-NEXT:    kmovd %k0, %eax
11094; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11095; VLX-NEXT:    retq
11096;
11097; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11098; NoVLX:       # %bb.0: # %entry
11099; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11100; NoVLX-NEXT:    kmovw %edi, %k1
11101; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11102; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11103; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11104; NoVLX-NEXT:    kmovw %k0, %eax
11105; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11106; NoVLX-NEXT:    vzeroupper
11107; NoVLX-NEXT:    retq
11108entry:
11109  %0 = bitcast <2 x i64> %__a to <4 x i32>
11110  %load = load i32, ptr %__b
11111  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11112  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11113  %2 = icmp sge <4 x i32> %0, %1
11114  %3 = bitcast i8 %__u to <8 x i1>
11115  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11116  %4 = and <4 x i1> %extract.i, %2
11117  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11118  %6 = bitcast <16 x i1> %5 to i16
11119  ret i16 %6
11120}
11121
11122
11123define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11124; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11125; VLX:       # %bb.0: # %entry
11126; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
11127; VLX-NEXT:    kmovd %k0, %eax
11128; VLX-NEXT:    retq
11129;
11130; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11131; NoVLX:       # %bb.0: # %entry
11132; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11133; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11134; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11135; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11136; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11137; NoVLX-NEXT:    kmovw %k0, %eax
11138; NoVLX-NEXT:    vzeroupper
11139; NoVLX-NEXT:    retq
11140entry:
11141  %0 = bitcast <2 x i64> %__a to <4 x i32>
11142  %1 = bitcast <2 x i64> %__b to <4 x i32>
11143  %2 = icmp sge <4 x i32> %0, %1
11144  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11145  %4 = bitcast <32 x i1> %3 to i32
11146  ret i32 %4
11147}
11148
11149define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11150; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11151; VLX:       # %bb.0: # %entry
11152; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
11153; VLX-NEXT:    kmovd %k0, %eax
11154; VLX-NEXT:    retq
11155;
11156; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11157; NoVLX:       # %bb.0: # %entry
11158; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11159; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
11160; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11161; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11162; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11163; NoVLX-NEXT:    kmovw %k0, %eax
11164; NoVLX-NEXT:    vzeroupper
11165; NoVLX-NEXT:    retq
11166entry:
11167  %0 = bitcast <2 x i64> %__a to <4 x i32>
11168  %load = load <2 x i64>, ptr %__b
11169  %1 = bitcast <2 x i64> %load to <4 x i32>
11170  %2 = icmp sge <4 x i32> %0, %1
11171  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11172  %4 = bitcast <32 x i1> %3 to i32
11173  ret i32 %4
11174}
11175
11176define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11177; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11178; VLX:       # %bb.0: # %entry
11179; VLX-NEXT:    kmovd %edi, %k1
11180; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11181; VLX-NEXT:    kmovd %k0, %eax
11182; VLX-NEXT:    retq
11183;
11184; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11185; NoVLX:       # %bb.0: # %entry
11186; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11187; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11188; NoVLX-NEXT:    kmovw %edi, %k1
11189; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11190; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11191; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11192; NoVLX-NEXT:    kmovw %k0, %eax
11193; NoVLX-NEXT:    vzeroupper
11194; NoVLX-NEXT:    retq
11195entry:
11196  %0 = bitcast <2 x i64> %__a to <4 x i32>
11197  %1 = bitcast <2 x i64> %__b to <4 x i32>
11198  %2 = icmp sge <4 x i32> %0, %1
11199  %3 = bitcast i8 %__u to <8 x i1>
11200  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11201  %4 = and <4 x i1> %2, %extract.i
11202  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11203  %6 = bitcast <32 x i1> %5 to i32
11204  ret i32 %6
11205}
11206
11207define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11208; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11209; VLX:       # %bb.0: # %entry
11210; VLX-NEXT:    kmovd %edi, %k1
11211; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11212; VLX-NEXT:    kmovd %k0, %eax
11213; VLX-NEXT:    retq
11214;
11215; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11216; NoVLX:       # %bb.0: # %entry
11217; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11218; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11219; NoVLX-NEXT:    kmovw %edi, %k1
11220; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11221; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11222; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11223; NoVLX-NEXT:    kmovw %k0, %eax
11224; NoVLX-NEXT:    vzeroupper
11225; NoVLX-NEXT:    retq
11226entry:
11227  %0 = bitcast <2 x i64> %__a to <4 x i32>
11228  %load = load <2 x i64>, ptr %__b
11229  %1 = bitcast <2 x i64> %load to <4 x i32>
11230  %2 = icmp sge <4 x i32> %0, %1
11231  %3 = bitcast i8 %__u to <8 x i1>
11232  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11233  %4 = and <4 x i1> %2, %extract.i
11234  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11235  %6 = bitcast <32 x i1> %5 to i32
11236  ret i32 %6
11237}
11238
11239
11240define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11241; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11242; VLX:       # %bb.0: # %entry
11243; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11244; VLX-NEXT:    kmovd %k0, %eax
11245; VLX-NEXT:    retq
11246;
11247; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11248; NoVLX:       # %bb.0: # %entry
11249; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11250; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11251; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11252; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11253; NoVLX-NEXT:    kmovw %k0, %eax
11254; NoVLX-NEXT:    vzeroupper
11255; NoVLX-NEXT:    retq
11256entry:
11257  %0 = bitcast <2 x i64> %__a to <4 x i32>
11258  %load = load i32, ptr %__b
11259  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11260  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11261  %2 = icmp sge <4 x i32> %0, %1
11262  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11263  %4 = bitcast <32 x i1> %3 to i32
11264  ret i32 %4
11265}
11266
11267define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11268; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11269; VLX:       # %bb.0: # %entry
11270; VLX-NEXT:    kmovd %edi, %k1
11271; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11272; VLX-NEXT:    kmovd %k0, %eax
11273; VLX-NEXT:    retq
11274;
11275; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11276; NoVLX:       # %bb.0: # %entry
11277; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11278; NoVLX-NEXT:    kmovw %edi, %k1
11279; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11280; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11281; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11282; NoVLX-NEXT:    kmovw %k0, %eax
11283; NoVLX-NEXT:    vzeroupper
11284; NoVLX-NEXT:    retq
11285entry:
11286  %0 = bitcast <2 x i64> %__a to <4 x i32>
11287  %load = load i32, ptr %__b
11288  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11289  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11290  %2 = icmp sge <4 x i32> %0, %1
11291  %3 = bitcast i8 %__u to <8 x i1>
11292  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11293  %4 = and <4 x i1> %extract.i, %2
11294  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11295  %6 = bitcast <32 x i1> %5 to i32
11296  ret i32 %6
11297}
11298
11299
11300define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11301; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11302; VLX:       # %bb.0: # %entry
11303; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
11304; VLX-NEXT:    kmovq %k0, %rax
11305; VLX-NEXT:    retq
11306;
11307; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11308; NoVLX:       # %bb.0: # %entry
11309; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11310; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11311; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11312; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11313; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11314; NoVLX-NEXT:    kmovw %k0, %eax
11315; NoVLX-NEXT:    vzeroupper
11316; NoVLX-NEXT:    retq
11317entry:
11318  %0 = bitcast <2 x i64> %__a to <4 x i32>
11319  %1 = bitcast <2 x i64> %__b to <4 x i32>
11320  %2 = icmp sge <4 x i32> %0, %1
11321  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11322  %4 = bitcast <64 x i1> %3 to i64
11323  ret i64 %4
11324}
11325
11326define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11327; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11328; VLX:       # %bb.0: # %entry
11329; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
11330; VLX-NEXT:    kmovq %k0, %rax
11331; VLX-NEXT:    retq
11332;
11333; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11334; NoVLX:       # %bb.0: # %entry
11335; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11336; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
11337; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11338; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11339; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11340; NoVLX-NEXT:    kmovw %k0, %eax
11341; NoVLX-NEXT:    vzeroupper
11342; NoVLX-NEXT:    retq
11343entry:
11344  %0 = bitcast <2 x i64> %__a to <4 x i32>
11345  %load = load <2 x i64>, ptr %__b
11346  %1 = bitcast <2 x i64> %load to <4 x i32>
11347  %2 = icmp sge <4 x i32> %0, %1
11348  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11349  %4 = bitcast <64 x i1> %3 to i64
11350  ret i64 %4
11351}
11352
11353define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11354; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11355; VLX:       # %bb.0: # %entry
11356; VLX-NEXT:    kmovd %edi, %k1
11357; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11358; VLX-NEXT:    kmovq %k0, %rax
11359; VLX-NEXT:    retq
11360;
11361; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11362; NoVLX:       # %bb.0: # %entry
11363; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11364; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11365; NoVLX-NEXT:    kmovw %edi, %k1
11366; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11367; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11368; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11369; NoVLX-NEXT:    kmovw %k0, %eax
11370; NoVLX-NEXT:    vzeroupper
11371; NoVLX-NEXT:    retq
11372entry:
11373  %0 = bitcast <2 x i64> %__a to <4 x i32>
11374  %1 = bitcast <2 x i64> %__b to <4 x i32>
11375  %2 = icmp sge <4 x i32> %0, %1
11376  %3 = bitcast i8 %__u to <8 x i1>
11377  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11378  %4 = and <4 x i1> %2, %extract.i
11379  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11380  %6 = bitcast <64 x i1> %5 to i64
11381  ret i64 %6
11382}
11383
11384define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11385; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11386; VLX:       # %bb.0: # %entry
11387; VLX-NEXT:    kmovd %edi, %k1
11388; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11389; VLX-NEXT:    kmovq %k0, %rax
11390; VLX-NEXT:    retq
11391;
11392; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11393; NoVLX:       # %bb.0: # %entry
11394; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11395; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11396; NoVLX-NEXT:    kmovw %edi, %k1
11397; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11398; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11399; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11400; NoVLX-NEXT:    kmovw %k0, %eax
11401; NoVLX-NEXT:    vzeroupper
11402; NoVLX-NEXT:    retq
11403entry:
11404  %0 = bitcast <2 x i64> %__a to <4 x i32>
11405  %load = load <2 x i64>, ptr %__b
11406  %1 = bitcast <2 x i64> %load to <4 x i32>
11407  %2 = icmp sge <4 x i32> %0, %1
11408  %3 = bitcast i8 %__u to <8 x i1>
11409  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11410  %4 = and <4 x i1> %2, %extract.i
11411  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11412  %6 = bitcast <64 x i1> %5 to i64
11413  ret i64 %6
11414}
11415
11416
11417define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11418; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11419; VLX:       # %bb.0: # %entry
11420; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11421; VLX-NEXT:    kmovq %k0, %rax
11422; VLX-NEXT:    retq
11423;
11424; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11425; NoVLX:       # %bb.0: # %entry
11426; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11427; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11428; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11429; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11430; NoVLX-NEXT:    kmovw %k0, %eax
11431; NoVLX-NEXT:    vzeroupper
11432; NoVLX-NEXT:    retq
11433entry:
11434  %0 = bitcast <2 x i64> %__a to <4 x i32>
11435  %load = load i32, ptr %__b
11436  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11437  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11438  %2 = icmp sge <4 x i32> %0, %1
11439  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11440  %4 = bitcast <64 x i1> %3 to i64
11441  ret i64 %4
11442}
11443
11444define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11445; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11446; VLX:       # %bb.0: # %entry
11447; VLX-NEXT:    kmovd %edi, %k1
11448; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11449; VLX-NEXT:    kmovq %k0, %rax
11450; VLX-NEXT:    retq
11451;
11452; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11453; NoVLX:       # %bb.0: # %entry
11454; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11455; NoVLX-NEXT:    kmovw %edi, %k1
11456; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11457; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11458; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11459; NoVLX-NEXT:    kmovw %k0, %eax
11460; NoVLX-NEXT:    vzeroupper
11461; NoVLX-NEXT:    retq
11462entry:
11463  %0 = bitcast <2 x i64> %__a to <4 x i32>
11464  %load = load i32, ptr %__b
11465  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11466  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11467  %2 = icmp sge <4 x i32> %0, %1
11468  %3 = bitcast i8 %__u to <8 x i1>
11469  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11470  %4 = and <4 x i1> %extract.i, %2
11471  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11472  %6 = bitcast <64 x i1> %5 to i64
11473  ret i64 %6
11474}
11475
11476
11477define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11478; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11479; VLX:       # %bb.0: # %entry
11480; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
11481; VLX-NEXT:    kmovd %k0, %eax
11482; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11483; VLX-NEXT:    vzeroupper
11484; VLX-NEXT:    retq
11485;
11486; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11487; NoVLX:       # %bb.0: # %entry
11488; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11489; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11490; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11491; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11492; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11493; NoVLX-NEXT:    kmovw %k0, %eax
11494; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11495; NoVLX-NEXT:    vzeroupper
11496; NoVLX-NEXT:    retq
11497entry:
11498  %0 = bitcast <4 x i64> %__a to <8 x i32>
11499  %1 = bitcast <4 x i64> %__b to <8 x i32>
11500  %2 = icmp sge <8 x i32> %0, %1
11501  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11502  %4 = bitcast <16 x i1> %3 to i16
11503  ret i16 %4
11504}
11505
11506define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11507; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11508; VLX:       # %bb.0: # %entry
11509; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
11510; VLX-NEXT:    kmovd %k0, %eax
11511; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11512; VLX-NEXT:    vzeroupper
11513; VLX-NEXT:    retq
11514;
11515; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11516; NoVLX:       # %bb.0: # %entry
11517; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11518; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
11519; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11520; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11521; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11522; NoVLX-NEXT:    kmovw %k0, %eax
11523; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11524; NoVLX-NEXT:    vzeroupper
11525; NoVLX-NEXT:    retq
11526entry:
11527  %0 = bitcast <4 x i64> %__a to <8 x i32>
11528  %load = load <4 x i64>, ptr %__b
11529  %1 = bitcast <4 x i64> %load to <8 x i32>
11530  %2 = icmp sge <8 x i32> %0, %1
11531  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11532  %4 = bitcast <16 x i1> %3 to i16
11533  ret i16 %4
11534}
11535
11536define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11537; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11538; VLX:       # %bb.0: # %entry
11539; VLX-NEXT:    kmovd %edi, %k1
11540; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11541; VLX-NEXT:    kmovd %k0, %eax
11542; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11543; VLX-NEXT:    vzeroupper
11544; VLX-NEXT:    retq
11545;
11546; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11547; NoVLX:       # %bb.0: # %entry
11548; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11549; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11550; NoVLX-NEXT:    kmovw %edi, %k1
11551; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11552; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11553; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11554; NoVLX-NEXT:    kmovw %k0, %eax
11555; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11556; NoVLX-NEXT:    vzeroupper
11557; NoVLX-NEXT:    retq
11558entry:
11559  %0 = bitcast <4 x i64> %__a to <8 x i32>
11560  %1 = bitcast <4 x i64> %__b to <8 x i32>
11561  %2 = icmp sge <8 x i32> %0, %1
11562  %3 = bitcast i8 %__u to <8 x i1>
11563  %4 = and <8 x i1> %2, %3
11564  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11565  %6 = bitcast <16 x i1> %5 to i16
11566  ret i16 %6
11567}
11568
11569define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11570; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11571; VLX:       # %bb.0: # %entry
11572; VLX-NEXT:    kmovd %edi, %k1
11573; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11574; VLX-NEXT:    kmovd %k0, %eax
11575; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11576; VLX-NEXT:    vzeroupper
11577; VLX-NEXT:    retq
11578;
11579; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11580; NoVLX:       # %bb.0: # %entry
11581; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11582; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
11583; NoVLX-NEXT:    kmovw %edi, %k1
11584; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11585; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11586; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11587; NoVLX-NEXT:    kmovw %k0, %eax
11588; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11589; NoVLX-NEXT:    vzeroupper
11590; NoVLX-NEXT:    retq
11591entry:
11592  %0 = bitcast <4 x i64> %__a to <8 x i32>
11593  %load = load <4 x i64>, ptr %__b
11594  %1 = bitcast <4 x i64> %load to <8 x i32>
11595  %2 = icmp sge <8 x i32> %0, %1
11596  %3 = bitcast i8 %__u to <8 x i1>
11597  %4 = and <8 x i1> %2, %3
11598  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11599  %6 = bitcast <16 x i1> %5 to i16
11600  ret i16 %6
11601}
11602
11603
11604define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11605; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11606; VLX:       # %bb.0: # %entry
11607; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11608; VLX-NEXT:    kmovd %k0, %eax
11609; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11610; VLX-NEXT:    vzeroupper
11611; VLX-NEXT:    retq
11612;
11613; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11614; NoVLX:       # %bb.0: # %entry
11615; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11616; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11617; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11618; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11619; NoVLX-NEXT:    kmovw %k0, %eax
11620; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11621; NoVLX-NEXT:    vzeroupper
11622; NoVLX-NEXT:    retq
11623entry:
11624  %0 = bitcast <4 x i64> %__a to <8 x i32>
11625  %load = load i32, ptr %__b
11626  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11627  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11628  %2 = icmp sge <8 x i32> %0, %1
11629  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11630  %4 = bitcast <16 x i1> %3 to i16
11631  ret i16 %4
11632}
11633
11634define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11635; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11636; VLX:       # %bb.0: # %entry
11637; VLX-NEXT:    kmovd %edi, %k1
11638; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11639; VLX-NEXT:    kmovd %k0, %eax
11640; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11641; VLX-NEXT:    vzeroupper
11642; VLX-NEXT:    retq
11643;
11644; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11645; NoVLX:       # %bb.0: # %entry
11646; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11647; NoVLX-NEXT:    kmovw %edi, %k1
11648; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11649; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11650; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11651; NoVLX-NEXT:    kmovw %k0, %eax
11652; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11653; NoVLX-NEXT:    vzeroupper
11654; NoVLX-NEXT:    retq
11655entry:
11656  %0 = bitcast <4 x i64> %__a to <8 x i32>
11657  %load = load i32, ptr %__b
11658  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11659  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11660  %2 = icmp sge <8 x i32> %0, %1
11661  %3 = bitcast i8 %__u to <8 x i1>
11662  %4 = and <8 x i1> %3, %2
11663  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11664  %6 = bitcast <16 x i1> %5 to i16
11665  ret i16 %6
11666}
11667
11668
11669define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11670; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11671; VLX:       # %bb.0: # %entry
11672; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
11673; VLX-NEXT:    kmovd %k0, %eax
11674; VLX-NEXT:    vzeroupper
11675; VLX-NEXT:    retq
11676;
11677; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11678; NoVLX:       # %bb.0: # %entry
11679; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11680; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11681; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11682; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11683; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11684; NoVLX-NEXT:    kmovw %k0, %eax
11685; NoVLX-NEXT:    vzeroupper
11686; NoVLX-NEXT:    retq
11687entry:
11688  %0 = bitcast <4 x i64> %__a to <8 x i32>
11689  %1 = bitcast <4 x i64> %__b to <8 x i32>
11690  %2 = icmp sge <8 x i32> %0, %1
11691  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11692  %4 = bitcast <32 x i1> %3 to i32
11693  ret i32 %4
11694}
11695
11696define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11697; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11698; VLX:       # %bb.0: # %entry
11699; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
11700; VLX-NEXT:    kmovd %k0, %eax
11701; VLX-NEXT:    vzeroupper
11702; VLX-NEXT:    retq
11703;
11704; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11705; NoVLX:       # %bb.0: # %entry
11706; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11707; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
11708; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11709; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11710; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11711; NoVLX-NEXT:    kmovw %k0, %eax
11712; NoVLX-NEXT:    vzeroupper
11713; NoVLX-NEXT:    retq
11714entry:
11715  %0 = bitcast <4 x i64> %__a to <8 x i32>
11716  %load = load <4 x i64>, ptr %__b
11717  %1 = bitcast <4 x i64> %load to <8 x i32>
11718  %2 = icmp sge <8 x i32> %0, %1
11719  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11720  %4 = bitcast <32 x i1> %3 to i32
11721  ret i32 %4
11722}
11723
11724define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11725; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11726; VLX:       # %bb.0: # %entry
11727; VLX-NEXT:    kmovd %edi, %k1
11728; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11729; VLX-NEXT:    kmovd %k0, %eax
11730; VLX-NEXT:    vzeroupper
11731; VLX-NEXT:    retq
11732;
11733; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11734; NoVLX:       # %bb.0: # %entry
11735; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11736; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11737; NoVLX-NEXT:    kmovw %edi, %k1
11738; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11739; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11740; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11741; NoVLX-NEXT:    kmovw %k0, %eax
11742; NoVLX-NEXT:    vzeroupper
11743; NoVLX-NEXT:    retq
11744entry:
11745  %0 = bitcast <4 x i64> %__a to <8 x i32>
11746  %1 = bitcast <4 x i64> %__b to <8 x i32>
11747  %2 = icmp sge <8 x i32> %0, %1
11748  %3 = bitcast i8 %__u to <8 x i1>
11749  %4 = and <8 x i1> %2, %3
11750  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11751  %6 = bitcast <32 x i1> %5 to i32
11752  ret i32 %6
11753}
11754
11755define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11756; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11757; VLX:       # %bb.0: # %entry
11758; VLX-NEXT:    kmovd %edi, %k1
11759; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11760; VLX-NEXT:    kmovd %k0, %eax
11761; VLX-NEXT:    vzeroupper
11762; VLX-NEXT:    retq
11763;
11764; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11765; NoVLX:       # %bb.0: # %entry
11766; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11767; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
11768; NoVLX-NEXT:    kmovw %edi, %k1
11769; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11770; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11771; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11772; NoVLX-NEXT:    kmovw %k0, %eax
11773; NoVLX-NEXT:    vzeroupper
11774; NoVLX-NEXT:    retq
11775entry:
11776  %0 = bitcast <4 x i64> %__a to <8 x i32>
11777  %load = load <4 x i64>, ptr %__b
11778  %1 = bitcast <4 x i64> %load to <8 x i32>
11779  %2 = icmp sge <8 x i32> %0, %1
11780  %3 = bitcast i8 %__u to <8 x i1>
11781  %4 = and <8 x i1> %2, %3
11782  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11783  %6 = bitcast <32 x i1> %5 to i32
11784  ret i32 %6
11785}
11786
11787
11788define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11789; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11790; VLX:       # %bb.0: # %entry
11791; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11792; VLX-NEXT:    kmovd %k0, %eax
11793; VLX-NEXT:    vzeroupper
11794; VLX-NEXT:    retq
11795;
11796; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11797; NoVLX:       # %bb.0: # %entry
11798; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11799; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11800; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11801; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11802; NoVLX-NEXT:    kmovw %k0, %eax
11803; NoVLX-NEXT:    vzeroupper
11804; NoVLX-NEXT:    retq
11805entry:
11806  %0 = bitcast <4 x i64> %__a to <8 x i32>
11807  %load = load i32, ptr %__b
11808  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11809  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11810  %2 = icmp sge <8 x i32> %0, %1
11811  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11812  %4 = bitcast <32 x i1> %3 to i32
11813  ret i32 %4
11814}
11815
11816define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11817; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11818; VLX:       # %bb.0: # %entry
11819; VLX-NEXT:    kmovd %edi, %k1
11820; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11821; VLX-NEXT:    kmovd %k0, %eax
11822; VLX-NEXT:    vzeroupper
11823; VLX-NEXT:    retq
11824;
11825; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11826; NoVLX:       # %bb.0: # %entry
11827; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11828; NoVLX-NEXT:    kmovw %edi, %k1
11829; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11830; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11831; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11832; NoVLX-NEXT:    kmovw %k0, %eax
11833; NoVLX-NEXT:    vzeroupper
11834; NoVLX-NEXT:    retq
11835entry:
11836  %0 = bitcast <4 x i64> %__a to <8 x i32>
11837  %load = load i32, ptr %__b
11838  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11839  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11840  %2 = icmp sge <8 x i32> %0, %1
11841  %3 = bitcast i8 %__u to <8 x i1>
11842  %4 = and <8 x i1> %3, %2
11843  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11844  %6 = bitcast <32 x i1> %5 to i32
11845  ret i32 %6
11846}
11847
11848
11849define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11850; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11851; VLX:       # %bb.0: # %entry
11852; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
11853; VLX-NEXT:    kmovq %k0, %rax
11854; VLX-NEXT:    vzeroupper
11855; VLX-NEXT:    retq
11856;
11857; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11858; NoVLX:       # %bb.0: # %entry
11859; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11860; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11861; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11862; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11863; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11864; NoVLX-NEXT:    kmovw %k0, %eax
11865; NoVLX-NEXT:    vzeroupper
11866; NoVLX-NEXT:    retq
11867entry:
11868  %0 = bitcast <4 x i64> %__a to <8 x i32>
11869  %1 = bitcast <4 x i64> %__b to <8 x i32>
11870  %2 = icmp sge <8 x i32> %0, %1
11871  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11872  %4 = bitcast <64 x i1> %3 to i64
11873  ret i64 %4
11874}
11875
11876define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11877; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11878; VLX:       # %bb.0: # %entry
11879; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
11880; VLX-NEXT:    kmovq %k0, %rax
11881; VLX-NEXT:    vzeroupper
11882; VLX-NEXT:    retq
11883;
11884; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11885; NoVLX:       # %bb.0: # %entry
11886; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11887; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
11888; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11889; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11890; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11891; NoVLX-NEXT:    kmovw %k0, %eax
11892; NoVLX-NEXT:    vzeroupper
11893; NoVLX-NEXT:    retq
11894entry:
11895  %0 = bitcast <4 x i64> %__a to <8 x i32>
11896  %load = load <4 x i64>, ptr %__b
11897  %1 = bitcast <4 x i64> %load to <8 x i32>
11898  %2 = icmp sge <8 x i32> %0, %1
11899  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11900  %4 = bitcast <64 x i1> %3 to i64
11901  ret i64 %4
11902}
11903
11904define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11905; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11906; VLX:       # %bb.0: # %entry
11907; VLX-NEXT:    kmovd %edi, %k1
11908; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11909; VLX-NEXT:    kmovq %k0, %rax
11910; VLX-NEXT:    vzeroupper
11911; VLX-NEXT:    retq
11912;
11913; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11914; NoVLX:       # %bb.0: # %entry
11915; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11916; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11917; NoVLX-NEXT:    kmovw %edi, %k1
11918; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11919; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11920; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11921; NoVLX-NEXT:    kmovw %k0, %eax
11922; NoVLX-NEXT:    vzeroupper
11923; NoVLX-NEXT:    retq
11924entry:
11925  %0 = bitcast <4 x i64> %__a to <8 x i32>
11926  %1 = bitcast <4 x i64> %__b to <8 x i32>
11927  %2 = icmp sge <8 x i32> %0, %1
11928  %3 = bitcast i8 %__u to <8 x i1>
11929  %4 = and <8 x i1> %2, %3
11930  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11931  %6 = bitcast <64 x i1> %5 to i64
11932  ret i64 %6
11933}
11934
11935define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11936; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11937; VLX:       # %bb.0: # %entry
11938; VLX-NEXT:    kmovd %edi, %k1
11939; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11940; VLX-NEXT:    kmovq %k0, %rax
11941; VLX-NEXT:    vzeroupper
11942; VLX-NEXT:    retq
11943;
11944; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11945; NoVLX:       # %bb.0: # %entry
11946; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11947; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
11948; NoVLX-NEXT:    kmovw %edi, %k1
11949; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11950; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11951; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11952; NoVLX-NEXT:    kmovw %k0, %eax
11953; NoVLX-NEXT:    vzeroupper
11954; NoVLX-NEXT:    retq
11955entry:
11956  %0 = bitcast <4 x i64> %__a to <8 x i32>
11957  %load = load <4 x i64>, ptr %__b
11958  %1 = bitcast <4 x i64> %load to <8 x i32>
11959  %2 = icmp sge <8 x i32> %0, %1
11960  %3 = bitcast i8 %__u to <8 x i1>
11961  %4 = and <8 x i1> %2, %3
11962  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11963  %6 = bitcast <64 x i1> %5 to i64
11964  ret i64 %6
11965}
11966
11967
11968define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11969; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11970; VLX:       # %bb.0: # %entry
11971; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11972; VLX-NEXT:    kmovq %k0, %rax
11973; VLX-NEXT:    vzeroupper
11974; VLX-NEXT:    retq
11975;
11976; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11977; NoVLX:       # %bb.0: # %entry
11978; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11979; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11980; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11981; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11982; NoVLX-NEXT:    kmovw %k0, %eax
11983; NoVLX-NEXT:    vzeroupper
11984; NoVLX-NEXT:    retq
11985entry:
11986  %0 = bitcast <4 x i64> %__a to <8 x i32>
11987  %load = load i32, ptr %__b
11988  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11989  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11990  %2 = icmp sge <8 x i32> %0, %1
11991  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11992  %4 = bitcast <64 x i1> %3 to i64
11993  ret i64 %4
11994}
11995
11996define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11997; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
11998; VLX:       # %bb.0: # %entry
11999; VLX-NEXT:    kmovd %edi, %k1
12000; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12001; VLX-NEXT:    kmovq %k0, %rax
12002; VLX-NEXT:    vzeroupper
12003; VLX-NEXT:    retq
12004;
12005; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12006; NoVLX:       # %bb.0: # %entry
12007; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12008; NoVLX-NEXT:    kmovw %edi, %k1
12009; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12010; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12011; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12012; NoVLX-NEXT:    kmovw %k0, %eax
12013; NoVLX-NEXT:    vzeroupper
12014; NoVLX-NEXT:    retq
12015entry:
12016  %0 = bitcast <4 x i64> %__a to <8 x i32>
12017  %load = load i32, ptr %__b
12018  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12019  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12020  %2 = icmp sge <8 x i32> %0, %1
12021  %3 = bitcast i8 %__u to <8 x i1>
12022  %4 = and <8 x i1> %3, %2
12023  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12024  %6 = bitcast <64 x i1> %5 to i64
12025  ret i64 %6
12026}
12027
12028
12029define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12030; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12031; VLX:       # %bb.0: # %entry
12032; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12033; VLX-NEXT:    kmovd %k0, %eax
12034; VLX-NEXT:    vzeroupper
12035; VLX-NEXT:    retq
12036;
12037; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12038; NoVLX:       # %bb.0: # %entry
12039; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12040; NoVLX-NEXT:    kmovw %k0, %eax
12041; NoVLX-NEXT:    vzeroupper
12042; NoVLX-NEXT:    retq
12043entry:
12044  %0 = bitcast <8 x i64> %__a to <16 x i32>
12045  %1 = bitcast <8 x i64> %__b to <16 x i32>
12046  %2 = icmp sge <16 x i32> %0, %1
12047  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12048  %4 = bitcast <32 x i1> %3 to i32
12049  ret i32 %4
12050}
12051
12052define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12053; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12054; VLX:       # %bb.0: # %entry
12055; VLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12056; VLX-NEXT:    kmovd %k0, %eax
12057; VLX-NEXT:    vzeroupper
12058; VLX-NEXT:    retq
12059;
12060; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12061; NoVLX:       # %bb.0: # %entry
12062; NoVLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12063; NoVLX-NEXT:    kmovw %k0, %eax
12064; NoVLX-NEXT:    vzeroupper
12065; NoVLX-NEXT:    retq
12066entry:
12067  %0 = bitcast <8 x i64> %__a to <16 x i32>
12068  %load = load <8 x i64>, ptr %__b
12069  %1 = bitcast <8 x i64> %load to <16 x i32>
12070  %2 = icmp sge <16 x i32> %0, %1
12071  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12072  %4 = bitcast <32 x i1> %3 to i32
12073  ret i32 %4
12074}
12075
12076define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12077; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12078; VLX:       # %bb.0: # %entry
12079; VLX-NEXT:    kmovd %edi, %k1
12080; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12081; VLX-NEXT:    kmovd %k0, %eax
12082; VLX-NEXT:    vzeroupper
12083; VLX-NEXT:    retq
12084;
12085; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12086; NoVLX:       # %bb.0: # %entry
12087; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12088; NoVLX-NEXT:    kmovw %k0, %eax
12089; NoVLX-NEXT:    andl %edi, %eax
12090; NoVLX-NEXT:    vzeroupper
12091; NoVLX-NEXT:    retq
12092entry:
12093  %0 = bitcast <8 x i64> %__a to <16 x i32>
12094  %1 = bitcast <8 x i64> %__b to <16 x i32>
12095  %2 = icmp sge <16 x i32> %0, %1
12096  %3 = bitcast i16 %__u to <16 x i1>
12097  %4 = and <16 x i1> %2, %3
12098  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12099  %6 = bitcast <32 x i1> %5 to i32
12100  ret i32 %6
12101}
12102
12103define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12104; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12105; VLX:       # %bb.0: # %entry
12106; VLX-NEXT:    kmovd %edi, %k1
12107; VLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12108; VLX-NEXT:    kmovd %k0, %eax
12109; VLX-NEXT:    vzeroupper
12110; VLX-NEXT:    retq
12111;
12112; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12113; NoVLX:       # %bb.0: # %entry
12114; NoVLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0
12115; NoVLX-NEXT:    kmovw %k0, %eax
12116; NoVLX-NEXT:    andl %edi, %eax
12117; NoVLX-NEXT:    vzeroupper
12118; NoVLX-NEXT:    retq
12119entry:
12120  %0 = bitcast <8 x i64> %__a to <16 x i32>
12121  %load = load <8 x i64>, ptr %__b
12122  %1 = bitcast <8 x i64> %load to <16 x i32>
12123  %2 = icmp sge <16 x i32> %0, %1
12124  %3 = bitcast i16 %__u to <16 x i1>
12125  %4 = and <16 x i1> %2, %3
12126  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12127  %6 = bitcast <32 x i1> %5 to i32
12128  ret i32 %6
12129}
12130
12131
12132define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12133; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12134; VLX:       # %bb.0: # %entry
12135; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12136; VLX-NEXT:    kmovd %k0, %eax
12137; VLX-NEXT:    vzeroupper
12138; VLX-NEXT:    retq
12139;
12140; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12141; NoVLX:       # %bb.0: # %entry
12142; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12143; NoVLX-NEXT:    kmovw %k0, %eax
12144; NoVLX-NEXT:    vzeroupper
12145; NoVLX-NEXT:    retq
12146entry:
12147  %0 = bitcast <8 x i64> %__a to <16 x i32>
12148  %load = load i32, ptr %__b
12149  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12150  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12151  %2 = icmp sge <16 x i32> %0, %1
12152  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12153  %4 = bitcast <32 x i1> %3 to i32
12154  ret i32 %4
12155}
12156
12157define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12158; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12159; VLX:       # %bb.0: # %entry
12160; VLX-NEXT:    kmovd %edi, %k1
12161; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12162; VLX-NEXT:    kmovd %k0, %eax
12163; VLX-NEXT:    vzeroupper
12164; VLX-NEXT:    retq
12165;
12166; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12167; NoVLX:       # %bb.0: # %entry
12168; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12169; NoVLX-NEXT:    kmovw %k0, %eax
12170; NoVLX-NEXT:    andl %edi, %eax
12171; NoVLX-NEXT:    vzeroupper
12172; NoVLX-NEXT:    retq
12173entry:
12174  %0 = bitcast <8 x i64> %__a to <16 x i32>
12175  %load = load i32, ptr %__b
12176  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12177  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12178  %2 = icmp sge <16 x i32> %0, %1
12179  %3 = bitcast i16 %__u to <16 x i1>
12180  %4 = and <16 x i1> %3, %2
12181  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12182  %6 = bitcast <32 x i1> %5 to i32
12183  ret i32 %6
12184}
12185
12186
12187define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12188; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12189; VLX:       # %bb.0: # %entry
12190; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12191; VLX-NEXT:    kmovq %k0, %rax
12192; VLX-NEXT:    vzeroupper
12193; VLX-NEXT:    retq
12194;
12195; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12196; NoVLX:       # %bb.0: # %entry
12197; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12198; NoVLX-NEXT:    kmovw %k0, %eax
12199; NoVLX-NEXT:    vzeroupper
12200; NoVLX-NEXT:    retq
12201entry:
12202  %0 = bitcast <8 x i64> %__a to <16 x i32>
12203  %1 = bitcast <8 x i64> %__b to <16 x i32>
12204  %2 = icmp sge <16 x i32> %0, %1
12205  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12206  %4 = bitcast <64 x i1> %3 to i64
12207  ret i64 %4
12208}
12209
12210define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12211; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12212; VLX:       # %bb.0: # %entry
12213; VLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12214; VLX-NEXT:    kmovq %k0, %rax
12215; VLX-NEXT:    vzeroupper
12216; VLX-NEXT:    retq
12217;
12218; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12219; NoVLX:       # %bb.0: # %entry
12220; NoVLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12221; NoVLX-NEXT:    kmovw %k0, %eax
12222; NoVLX-NEXT:    vzeroupper
12223; NoVLX-NEXT:    retq
12224entry:
12225  %0 = bitcast <8 x i64> %__a to <16 x i32>
12226  %load = load <8 x i64>, ptr %__b
12227  %1 = bitcast <8 x i64> %load to <16 x i32>
12228  %2 = icmp sge <16 x i32> %0, %1
12229  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12230  %4 = bitcast <64 x i1> %3 to i64
12231  ret i64 %4
12232}
12233
12234define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12235; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12236; VLX:       # %bb.0: # %entry
12237; VLX-NEXT:    kmovd %edi, %k1
12238; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12239; VLX-NEXT:    kmovq %k0, %rax
12240; VLX-NEXT:    vzeroupper
12241; VLX-NEXT:    retq
12242;
12243; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12244; NoVLX:       # %bb.0: # %entry
12245; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12246; NoVLX-NEXT:    kmovw %k0, %eax
12247; NoVLX-NEXT:    andl %edi, %eax
12248; NoVLX-NEXT:    vzeroupper
12249; NoVLX-NEXT:    retq
12250entry:
12251  %0 = bitcast <8 x i64> %__a to <16 x i32>
12252  %1 = bitcast <8 x i64> %__b to <16 x i32>
12253  %2 = icmp sge <16 x i32> %0, %1
12254  %3 = bitcast i16 %__u to <16 x i1>
12255  %4 = and <16 x i1> %2, %3
12256  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12257  %6 = bitcast <64 x i1> %5 to i64
12258  ret i64 %6
12259}
12260
12261define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12262; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12263; VLX:       # %bb.0: # %entry
12264; VLX-NEXT:    kmovd %edi, %k1
12265; VLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12266; VLX-NEXT:    kmovq %k0, %rax
12267; VLX-NEXT:    vzeroupper
12268; VLX-NEXT:    retq
12269;
12270; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12271; NoVLX:       # %bb.0: # %entry
12272; NoVLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0
12273; NoVLX-NEXT:    kmovw %k0, %eax
12274; NoVLX-NEXT:    andl %edi, %eax
12275; NoVLX-NEXT:    vzeroupper
12276; NoVLX-NEXT:    retq
12277entry:
12278  %0 = bitcast <8 x i64> %__a to <16 x i32>
12279  %load = load <8 x i64>, ptr %__b
12280  %1 = bitcast <8 x i64> %load to <16 x i32>
12281  %2 = icmp sge <16 x i32> %0, %1
12282  %3 = bitcast i16 %__u to <16 x i1>
12283  %4 = and <16 x i1> %2, %3
12284  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12285  %6 = bitcast <64 x i1> %5 to i64
12286  ret i64 %6
12287}
12288
12289
12290define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12291; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12292; VLX:       # %bb.0: # %entry
12293; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12294; VLX-NEXT:    kmovq %k0, %rax
12295; VLX-NEXT:    vzeroupper
12296; VLX-NEXT:    retq
12297;
12298; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12299; NoVLX:       # %bb.0: # %entry
12300; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12301; NoVLX-NEXT:    kmovw %k0, %eax
12302; NoVLX-NEXT:    vzeroupper
12303; NoVLX-NEXT:    retq
12304entry:
12305  %0 = bitcast <8 x i64> %__a to <16 x i32>
12306  %load = load i32, ptr %__b
12307  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12308  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12309  %2 = icmp sge <16 x i32> %0, %1
12310  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12311  %4 = bitcast <64 x i1> %3 to i64
12312  ret i64 %4
12313}
12314
12315define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12316; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12317; VLX:       # %bb.0: # %entry
12318; VLX-NEXT:    kmovd %edi, %k1
12319; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12320; VLX-NEXT:    kmovq %k0, %rax
12321; VLX-NEXT:    vzeroupper
12322; VLX-NEXT:    retq
12323;
12324; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12325; NoVLX:       # %bb.0: # %entry
12326; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12327; NoVLX-NEXT:    kmovw %k0, %eax
12328; NoVLX-NEXT:    andl %edi, %eax
12329; NoVLX-NEXT:    vzeroupper
12330; NoVLX-NEXT:    retq
12331entry:
12332  %0 = bitcast <8 x i64> %__a to <16 x i32>
12333  %load = load i32, ptr %__b
12334  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12335  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12336  %2 = icmp sge <16 x i32> %0, %1
12337  %3 = bitcast i16 %__u to <16 x i1>
12338  %4 = and <16 x i1> %3, %2
12339  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12340  %6 = bitcast <64 x i1> %5 to i64
12341  ret i64 %6
12342}
12343
12344
12345define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12346; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12347; VLX:       # %bb.0: # %entry
12348; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12349; VLX-NEXT:    kmovb %k0, %eax
12350; VLX-NEXT:    retq
12351;
12352; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12353; NoVLX:       # %bb.0: # %entry
12354; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12355; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12356; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12357; NoVLX-NEXT:    kmovw %k0, %eax
12358; NoVLX-NEXT:    andl $3, %eax
12359; NoVLX-NEXT:    vzeroupper
12360; NoVLX-NEXT:    retq
12361entry:
12362  %0 = bitcast <2 x i64> %__a to <2 x i64>
12363  %1 = bitcast <2 x i64> %__b to <2 x i64>
12364  %2 = icmp sge <2 x i64> %0, %1
12365  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12366  %4 = bitcast <4 x i1> %3 to i4
12367  ret i4 %4
12368}
12369
12370define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12371; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12372; VLX:       # %bb.0: # %entry
12373; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12374; VLX-NEXT:    kmovb %k0, %eax
12375; VLX-NEXT:    retq
12376;
12377; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12378; NoVLX:       # %bb.0: # %entry
12379; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12380; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12381; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12382; NoVLX-NEXT:    kmovw %k0, %eax
12383; NoVLX-NEXT:    andl $3, %eax
12384; NoVLX-NEXT:    vzeroupper
12385; NoVLX-NEXT:    retq
12386entry:
12387  %0 = bitcast <2 x i64> %__a to <2 x i64>
12388  %load = load <2 x i64>, ptr %__b
12389  %1 = bitcast <2 x i64> %load to <2 x i64>
12390  %2 = icmp sge <2 x i64> %0, %1
12391  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12392  %4 = bitcast <4 x i1> %3 to i4
12393  ret i4 %4
12394}
12395
12396define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12397; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12398; VLX:       # %bb.0: # %entry
12399; VLX-NEXT:    kmovd %edi, %k1
12400; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12401; VLX-NEXT:    kmovb %k0, %eax
12402; VLX-NEXT:    retq
12403;
12404; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12405; NoVLX:       # %bb.0: # %entry
12406; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12407; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12408; NoVLX-NEXT:    kmovw %edi, %k1
12409; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12410; NoVLX-NEXT:    kmovw %k0, %eax
12411; NoVLX-NEXT:    andl $3, %eax
12412; NoVLX-NEXT:    vzeroupper
12413; NoVLX-NEXT:    retq
12414entry:
12415  %0 = bitcast <2 x i64> %__a to <2 x i64>
12416  %1 = bitcast <2 x i64> %__b to <2 x i64>
12417  %2 = icmp sge <2 x i64> %0, %1
12418  %3 = bitcast i8 %__u to <8 x i1>
12419  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12420  %4 = and <2 x i1> %2, %extract.i
12421  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12422  %6 = bitcast <4 x i1> %5 to i4
12423  ret i4 %6
12424}
12425
12426define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12427; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12428; VLX:       # %bb.0: # %entry
12429; VLX-NEXT:    kmovd %edi, %k1
12430; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12431; VLX-NEXT:    kmovb %k0, %eax
12432; VLX-NEXT:    retq
12433;
12434; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12435; NoVLX:       # %bb.0: # %entry
12436; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12437; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12438; NoVLX-NEXT:    kmovw %edi, %k1
12439; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12440; NoVLX-NEXT:    kmovw %k0, %eax
12441; NoVLX-NEXT:    andl $3, %eax
12442; NoVLX-NEXT:    vzeroupper
12443; NoVLX-NEXT:    retq
12444entry:
12445  %0 = bitcast <2 x i64> %__a to <2 x i64>
12446  %load = load <2 x i64>, ptr %__b
12447  %1 = bitcast <2 x i64> %load to <2 x i64>
12448  %2 = icmp sge <2 x i64> %0, %1
12449  %3 = bitcast i8 %__u to <8 x i1>
12450  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12451  %4 = and <2 x i1> %2, %extract.i
12452  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12453  %6 = bitcast <4 x i1> %5 to i4
12454  ret i4 %6
12455}
12456
12457
12458define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12459; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12460; VLX:       # %bb.0: # %entry
12461; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12462; VLX-NEXT:    kmovb %k0, %eax
12463; VLX-NEXT:    retq
12464;
12465; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12466; NoVLX:       # %bb.0: # %entry
12467; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12468; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12469; NoVLX-NEXT:    kmovw %k0, %eax
12470; NoVLX-NEXT:    andl $3, %eax
12471; NoVLX-NEXT:    vzeroupper
12472; NoVLX-NEXT:    retq
12473entry:
12474  %0 = bitcast <2 x i64> %__a to <2 x i64>
12475  %load = load i64, ptr %__b
12476  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12477  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12478  %2 = icmp sge <2 x i64> %0, %1
12479  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12480  %4 = bitcast <4 x i1> %3 to i4
12481  ret i4 %4
12482}
12483
12484define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12485; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12486; VLX:       # %bb.0: # %entry
12487; VLX-NEXT:    kmovd %edi, %k1
12488; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12489; VLX-NEXT:    kmovb %k0, %eax
12490; VLX-NEXT:    retq
12491;
12492; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12493; NoVLX:       # %bb.0: # %entry
12494; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12495; NoVLX-NEXT:    kmovw %edi, %k1
12496; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12497; NoVLX-NEXT:    kmovw %k0, %eax
12498; NoVLX-NEXT:    andl $3, %eax
12499; NoVLX-NEXT:    vzeroupper
12500; NoVLX-NEXT:    retq
12501entry:
12502  %0 = bitcast <2 x i64> %__a to <2 x i64>
12503  %load = load i64, ptr %__b
12504  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12505  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12506  %2 = icmp sge <2 x i64> %0, %1
12507  %3 = bitcast i8 %__u to <8 x i1>
12508  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12509  %4 = and <2 x i1> %extract.i, %2
12510  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12511  %6 = bitcast <4 x i1> %5 to i4
12512  ret i4 %6
12513}
12514
12515
12516define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12517; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12518; VLX:       # %bb.0: # %entry
12519; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12520; VLX-NEXT:    kmovd %k0, %eax
12521; VLX-NEXT:    # kill: def $al killed $al killed $eax
12522; VLX-NEXT:    retq
12523;
12524; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12525; NoVLX:       # %bb.0: # %entry
12526; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12527; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12528; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12529; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12530; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12531; NoVLX-NEXT:    kmovw %k0, %eax
12532; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12533; NoVLX-NEXT:    vzeroupper
12534; NoVLX-NEXT:    retq
12535entry:
12536  %0 = bitcast <2 x i64> %__a to <2 x i64>
12537  %1 = bitcast <2 x i64> %__b to <2 x i64>
12538  %2 = icmp sge <2 x i64> %0, %1
12539  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12540  %4 = bitcast <8 x i1> %3 to i8
12541  ret i8 %4
12542}
12543
12544define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12545; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12546; VLX:       # %bb.0: # %entry
12547; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12548; VLX-NEXT:    kmovd %k0, %eax
12549; VLX-NEXT:    # kill: def $al killed $al killed $eax
12550; VLX-NEXT:    retq
12551;
12552; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12553; NoVLX:       # %bb.0: # %entry
12554; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12555; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12556; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12557; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12558; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12559; NoVLX-NEXT:    kmovw %k0, %eax
12560; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12561; NoVLX-NEXT:    vzeroupper
12562; NoVLX-NEXT:    retq
12563entry:
12564  %0 = bitcast <2 x i64> %__a to <2 x i64>
12565  %load = load <2 x i64>, ptr %__b
12566  %1 = bitcast <2 x i64> %load to <2 x i64>
12567  %2 = icmp sge <2 x i64> %0, %1
12568  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12569  %4 = bitcast <8 x i1> %3 to i8
12570  ret i8 %4
12571}
12572
12573define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12574; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12575; VLX:       # %bb.0: # %entry
12576; VLX-NEXT:    kmovd %edi, %k1
12577; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12578; VLX-NEXT:    kmovd %k0, %eax
12579; VLX-NEXT:    # kill: def $al killed $al killed $eax
12580; VLX-NEXT:    retq
12581;
12582; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12583; NoVLX:       # %bb.0: # %entry
12584; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12585; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12586; NoVLX-NEXT:    kmovw %edi, %k1
12587; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12588; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12589; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12590; NoVLX-NEXT:    kmovw %k0, %eax
12591; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12592; NoVLX-NEXT:    vzeroupper
12593; NoVLX-NEXT:    retq
12594entry:
12595  %0 = bitcast <2 x i64> %__a to <2 x i64>
12596  %1 = bitcast <2 x i64> %__b to <2 x i64>
12597  %2 = icmp sge <2 x i64> %0, %1
12598  %3 = bitcast i8 %__u to <8 x i1>
12599  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12600  %4 = and <2 x i1> %2, %extract.i
12601  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12602  %6 = bitcast <8 x i1> %5 to i8
12603  ret i8 %6
12604}
12605
12606define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12607; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12608; VLX:       # %bb.0: # %entry
12609; VLX-NEXT:    kmovd %edi, %k1
12610; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12611; VLX-NEXT:    kmovd %k0, %eax
12612; VLX-NEXT:    # kill: def $al killed $al killed $eax
12613; VLX-NEXT:    retq
12614;
12615; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12616; NoVLX:       # %bb.0: # %entry
12617; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12618; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12619; NoVLX-NEXT:    kmovw %edi, %k1
12620; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12621; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12622; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12623; NoVLX-NEXT:    kmovw %k0, %eax
12624; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12625; NoVLX-NEXT:    vzeroupper
12626; NoVLX-NEXT:    retq
12627entry:
12628  %0 = bitcast <2 x i64> %__a to <2 x i64>
12629  %load = load <2 x i64>, ptr %__b
12630  %1 = bitcast <2 x i64> %load to <2 x i64>
12631  %2 = icmp sge <2 x i64> %0, %1
12632  %3 = bitcast i8 %__u to <8 x i1>
12633  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12634  %4 = and <2 x i1> %2, %extract.i
12635  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12636  %6 = bitcast <8 x i1> %5 to i8
12637  ret i8 %6
12638}
12639
12640
12641define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12642; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12643; VLX:       # %bb.0: # %entry
12644; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12645; VLX-NEXT:    kmovd %k0, %eax
12646; VLX-NEXT:    # kill: def $al killed $al killed $eax
12647; VLX-NEXT:    retq
12648;
12649; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12650; NoVLX:       # %bb.0: # %entry
12651; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12652; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12653; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12654; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12655; NoVLX-NEXT:    kmovw %k0, %eax
12656; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12657; NoVLX-NEXT:    vzeroupper
12658; NoVLX-NEXT:    retq
12659entry:
12660  %0 = bitcast <2 x i64> %__a to <2 x i64>
12661  %load = load i64, ptr %__b
12662  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12663  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12664  %2 = icmp sge <2 x i64> %0, %1
12665  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12666  %4 = bitcast <8 x i1> %3 to i8
12667  ret i8 %4
12668}
12669
12670define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12671; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12672; VLX:       # %bb.0: # %entry
12673; VLX-NEXT:    kmovd %edi, %k1
12674; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12675; VLX-NEXT:    kmovd %k0, %eax
12676; VLX-NEXT:    # kill: def $al killed $al killed $eax
12677; VLX-NEXT:    retq
12678;
12679; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12680; NoVLX:       # %bb.0: # %entry
12681; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12682; NoVLX-NEXT:    kmovw %edi, %k1
12683; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12684; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12685; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12686; NoVLX-NEXT:    kmovw %k0, %eax
12687; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12688; NoVLX-NEXT:    vzeroupper
12689; NoVLX-NEXT:    retq
12690entry:
12691  %0 = bitcast <2 x i64> %__a to <2 x i64>
12692  %load = load i64, ptr %__b
12693  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12694  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12695  %2 = icmp sge <2 x i64> %0, %1
12696  %3 = bitcast i8 %__u to <8 x i1>
12697  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12698  %4 = and <2 x i1> %extract.i, %2
12699  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12700  %6 = bitcast <8 x i1> %5 to i8
12701  ret i8 %6
12702}
12703
12704
12705define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12706; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12707; VLX:       # %bb.0: # %entry
12708; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12709; VLX-NEXT:    kmovd %k0, %eax
12710; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12711; VLX-NEXT:    retq
12712;
12713; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12714; NoVLX:       # %bb.0: # %entry
12715; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12716; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12717; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12718; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12719; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12720; NoVLX-NEXT:    kmovw %k0, %eax
12721; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12722; NoVLX-NEXT:    vzeroupper
12723; NoVLX-NEXT:    retq
12724entry:
12725  %0 = bitcast <2 x i64> %__a to <2 x i64>
12726  %1 = bitcast <2 x i64> %__b to <2 x i64>
12727  %2 = icmp sge <2 x i64> %0, %1
12728  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12729  %4 = bitcast <16 x i1> %3 to i16
12730  ret i16 %4
12731}
12732
12733define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12734; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12735; VLX:       # %bb.0: # %entry
12736; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12737; VLX-NEXT:    kmovd %k0, %eax
12738; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12739; VLX-NEXT:    retq
12740;
12741; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12742; NoVLX:       # %bb.0: # %entry
12743; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12744; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12745; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12746; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12747; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12748; NoVLX-NEXT:    kmovw %k0, %eax
12749; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12750; NoVLX-NEXT:    vzeroupper
12751; NoVLX-NEXT:    retq
12752entry:
12753  %0 = bitcast <2 x i64> %__a to <2 x i64>
12754  %load = load <2 x i64>, ptr %__b
12755  %1 = bitcast <2 x i64> %load to <2 x i64>
12756  %2 = icmp sge <2 x i64> %0, %1
12757  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12758  %4 = bitcast <16 x i1> %3 to i16
12759  ret i16 %4
12760}
12761
12762define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12763; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12764; VLX:       # %bb.0: # %entry
12765; VLX-NEXT:    kmovd %edi, %k1
12766; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12767; VLX-NEXT:    kmovd %k0, %eax
12768; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12769; VLX-NEXT:    retq
12770;
12771; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12772; NoVLX:       # %bb.0: # %entry
12773; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12774; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12775; NoVLX-NEXT:    kmovw %edi, %k1
12776; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12777; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12778; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12779; NoVLX-NEXT:    kmovw %k0, %eax
12780; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12781; NoVLX-NEXT:    vzeroupper
12782; NoVLX-NEXT:    retq
12783entry:
12784  %0 = bitcast <2 x i64> %__a to <2 x i64>
12785  %1 = bitcast <2 x i64> %__b to <2 x i64>
12786  %2 = icmp sge <2 x i64> %0, %1
12787  %3 = bitcast i8 %__u to <8 x i1>
12788  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12789  %4 = and <2 x i1> %2, %extract.i
12790  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12791  %6 = bitcast <16 x i1> %5 to i16
12792  ret i16 %6
12793}
12794
12795define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12796; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12797; VLX:       # %bb.0: # %entry
12798; VLX-NEXT:    kmovd %edi, %k1
12799; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12800; VLX-NEXT:    kmovd %k0, %eax
12801; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12802; VLX-NEXT:    retq
12803;
12804; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12805; NoVLX:       # %bb.0: # %entry
12806; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12807; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12808; NoVLX-NEXT:    kmovw %edi, %k1
12809; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12810; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12811; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12812; NoVLX-NEXT:    kmovw %k0, %eax
12813; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12814; NoVLX-NEXT:    vzeroupper
12815; NoVLX-NEXT:    retq
12816entry:
12817  %0 = bitcast <2 x i64> %__a to <2 x i64>
12818  %load = load <2 x i64>, ptr %__b
12819  %1 = bitcast <2 x i64> %load to <2 x i64>
12820  %2 = icmp sge <2 x i64> %0, %1
12821  %3 = bitcast i8 %__u to <8 x i1>
12822  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12823  %4 = and <2 x i1> %2, %extract.i
12824  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12825  %6 = bitcast <16 x i1> %5 to i16
12826  ret i16 %6
12827}
12828
12829
12830define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12831; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12832; VLX:       # %bb.0: # %entry
12833; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12834; VLX-NEXT:    kmovd %k0, %eax
12835; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12836; VLX-NEXT:    retq
12837;
12838; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12839; NoVLX:       # %bb.0: # %entry
12840; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12841; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12842; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12843; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12844; NoVLX-NEXT:    kmovw %k0, %eax
12845; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12846; NoVLX-NEXT:    vzeroupper
12847; NoVLX-NEXT:    retq
12848entry:
12849  %0 = bitcast <2 x i64> %__a to <2 x i64>
12850  %load = load i64, ptr %__b
12851  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12852  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12853  %2 = icmp sge <2 x i64> %0, %1
12854  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12855  %4 = bitcast <16 x i1> %3 to i16
12856  ret i16 %4
12857}
12858
12859define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12860; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12861; VLX:       # %bb.0: # %entry
12862; VLX-NEXT:    kmovd %edi, %k1
12863; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12864; VLX-NEXT:    kmovd %k0, %eax
12865; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12866; VLX-NEXT:    retq
12867;
12868; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12869; NoVLX:       # %bb.0: # %entry
12870; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12871; NoVLX-NEXT:    kmovw %edi, %k1
12872; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12873; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12874; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12875; NoVLX-NEXT:    kmovw %k0, %eax
12876; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12877; NoVLX-NEXT:    vzeroupper
12878; NoVLX-NEXT:    retq
12879entry:
12880  %0 = bitcast <2 x i64> %__a to <2 x i64>
12881  %load = load i64, ptr %__b
12882  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12883  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12884  %2 = icmp sge <2 x i64> %0, %1
12885  %3 = bitcast i8 %__u to <8 x i1>
12886  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12887  %4 = and <2 x i1> %extract.i, %2
12888  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12889  %6 = bitcast <16 x i1> %5 to i16
12890  ret i16 %6
12891}
12892
12893
12894define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12895; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12896; VLX:       # %bb.0: # %entry
12897; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12898; VLX-NEXT:    kmovd %k0, %eax
12899; VLX-NEXT:    retq
12900;
12901; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12902; NoVLX:       # %bb.0: # %entry
12903; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12904; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12905; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12906; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12907; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12908; NoVLX-NEXT:    kmovw %k0, %eax
12909; NoVLX-NEXT:    vzeroupper
12910; NoVLX-NEXT:    retq
12911entry:
12912  %0 = bitcast <2 x i64> %__a to <2 x i64>
12913  %1 = bitcast <2 x i64> %__b to <2 x i64>
12914  %2 = icmp sge <2 x i64> %0, %1
12915  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12916  %4 = bitcast <32 x i1> %3 to i32
12917  ret i32 %4
12918}
12919
12920define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12921; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12922; VLX:       # %bb.0: # %entry
12923; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12924; VLX-NEXT:    kmovd %k0, %eax
12925; VLX-NEXT:    retq
12926;
12927; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12928; NoVLX:       # %bb.0: # %entry
12929; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12930; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12931; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12932; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12933; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12934; NoVLX-NEXT:    kmovw %k0, %eax
12935; NoVLX-NEXT:    vzeroupper
12936; NoVLX-NEXT:    retq
12937entry:
12938  %0 = bitcast <2 x i64> %__a to <2 x i64>
12939  %load = load <2 x i64>, ptr %__b
12940  %1 = bitcast <2 x i64> %load to <2 x i64>
12941  %2 = icmp sge <2 x i64> %0, %1
12942  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12943  %4 = bitcast <32 x i1> %3 to i32
12944  ret i32 %4
12945}
12946
12947define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12948; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12949; VLX:       # %bb.0: # %entry
12950; VLX-NEXT:    kmovd %edi, %k1
12951; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12952; VLX-NEXT:    kmovd %k0, %eax
12953; VLX-NEXT:    retq
12954;
12955; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12956; NoVLX:       # %bb.0: # %entry
12957; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12958; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12959; NoVLX-NEXT:    kmovw %edi, %k1
12960; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12961; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12962; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12963; NoVLX-NEXT:    kmovw %k0, %eax
12964; NoVLX-NEXT:    vzeroupper
12965; NoVLX-NEXT:    retq
12966entry:
12967  %0 = bitcast <2 x i64> %__a to <2 x i64>
12968  %1 = bitcast <2 x i64> %__b to <2 x i64>
12969  %2 = icmp sge <2 x i64> %0, %1
12970  %3 = bitcast i8 %__u to <8 x i1>
12971  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12972  %4 = and <2 x i1> %2, %extract.i
12973  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12974  %6 = bitcast <32 x i1> %5 to i32
12975  ret i32 %6
12976}
12977
12978define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12979; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12980; VLX:       # %bb.0: # %entry
12981; VLX-NEXT:    kmovd %edi, %k1
12982; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12983; VLX-NEXT:    kmovd %k0, %eax
12984; VLX-NEXT:    retq
12985;
12986; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12987; NoVLX:       # %bb.0: # %entry
12988; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12989; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12990; NoVLX-NEXT:    kmovw %edi, %k1
12991; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12992; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12993; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12994; NoVLX-NEXT:    kmovw %k0, %eax
12995; NoVLX-NEXT:    vzeroupper
12996; NoVLX-NEXT:    retq
12997entry:
12998  %0 = bitcast <2 x i64> %__a to <2 x i64>
12999  %load = load <2 x i64>, ptr %__b
13000  %1 = bitcast <2 x i64> %load to <2 x i64>
13001  %2 = icmp sge <2 x i64> %0, %1
13002  %3 = bitcast i8 %__u to <8 x i1>
13003  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13004  %4 = and <2 x i1> %2, %extract.i
13005  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13006  %6 = bitcast <32 x i1> %5 to i32
13007  ret i32 %6
13008}
13009
13010
13011define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13012; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13013; VLX:       # %bb.0: # %entry
13014; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13015; VLX-NEXT:    kmovd %k0, %eax
13016; VLX-NEXT:    retq
13017;
13018; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13019; NoVLX:       # %bb.0: # %entry
13020; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13021; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13022; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13023; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13024; NoVLX-NEXT:    kmovw %k0, %eax
13025; NoVLX-NEXT:    vzeroupper
13026; NoVLX-NEXT:    retq
13027entry:
13028  %0 = bitcast <2 x i64> %__a to <2 x i64>
13029  %load = load i64, ptr %__b
13030  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13031  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13032  %2 = icmp sge <2 x i64> %0, %1
13033  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13034  %4 = bitcast <32 x i1> %3 to i32
13035  ret i32 %4
13036}
13037
13038define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13039; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13040; VLX:       # %bb.0: # %entry
13041; VLX-NEXT:    kmovd %edi, %k1
13042; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13043; VLX-NEXT:    kmovd %k0, %eax
13044; VLX-NEXT:    retq
13045;
13046; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13047; NoVLX:       # %bb.0: # %entry
13048; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13049; NoVLX-NEXT:    kmovw %edi, %k1
13050; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13051; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13052; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13053; NoVLX-NEXT:    kmovw %k0, %eax
13054; NoVLX-NEXT:    vzeroupper
13055; NoVLX-NEXT:    retq
13056entry:
13057  %0 = bitcast <2 x i64> %__a to <2 x i64>
13058  %load = load i64, ptr %__b
13059  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13060  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13061  %2 = icmp sge <2 x i64> %0, %1
13062  %3 = bitcast i8 %__u to <8 x i1>
13063  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13064  %4 = and <2 x i1> %extract.i, %2
13065  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13066  %6 = bitcast <32 x i1> %5 to i32
13067  ret i32 %6
13068}
13069
13070
13071define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13072; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13073; VLX:       # %bb.0: # %entry
13074; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
13075; VLX-NEXT:    kmovq %k0, %rax
13076; VLX-NEXT:    retq
13077;
13078; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13079; NoVLX:       # %bb.0: # %entry
13080; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13081; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13082; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13083; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13084; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13085; NoVLX-NEXT:    kmovw %k0, %eax
13086; NoVLX-NEXT:    vzeroupper
13087; NoVLX-NEXT:    retq
13088entry:
13089  %0 = bitcast <2 x i64> %__a to <2 x i64>
13090  %1 = bitcast <2 x i64> %__b to <2 x i64>
13091  %2 = icmp sge <2 x i64> %0, %1
13092  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13093  %4 = bitcast <64 x i1> %3 to i64
13094  ret i64 %4
13095}
13096
13097define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13098; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13099; VLX:       # %bb.0: # %entry
13100; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
13101; VLX-NEXT:    kmovq %k0, %rax
13102; VLX-NEXT:    retq
13103;
13104; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13105; NoVLX:       # %bb.0: # %entry
13106; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13107; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
13108; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13109; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13110; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13111; NoVLX-NEXT:    kmovw %k0, %eax
13112; NoVLX-NEXT:    vzeroupper
13113; NoVLX-NEXT:    retq
13114entry:
13115  %0 = bitcast <2 x i64> %__a to <2 x i64>
13116  %load = load <2 x i64>, ptr %__b
13117  %1 = bitcast <2 x i64> %load to <2 x i64>
13118  %2 = icmp sge <2 x i64> %0, %1
13119  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13120  %4 = bitcast <64 x i1> %3 to i64
13121  ret i64 %4
13122}
13123
13124define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13125; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13126; VLX:       # %bb.0: # %entry
13127; VLX-NEXT:    kmovd %edi, %k1
13128; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13129; VLX-NEXT:    kmovq %k0, %rax
13130; VLX-NEXT:    retq
13131;
13132; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13133; NoVLX:       # %bb.0: # %entry
13134; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13135; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13136; NoVLX-NEXT:    kmovw %edi, %k1
13137; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13138; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13139; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13140; NoVLX-NEXT:    kmovw %k0, %eax
13141; NoVLX-NEXT:    vzeroupper
13142; NoVLX-NEXT:    retq
13143entry:
13144  %0 = bitcast <2 x i64> %__a to <2 x i64>
13145  %1 = bitcast <2 x i64> %__b to <2 x i64>
13146  %2 = icmp sge <2 x i64> %0, %1
13147  %3 = bitcast i8 %__u to <8 x i1>
13148  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13149  %4 = and <2 x i1> %2, %extract.i
13150  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13151  %6 = bitcast <64 x i1> %5 to i64
13152  ret i64 %6
13153}
13154
13155define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13156; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13157; VLX:       # %bb.0: # %entry
13158; VLX-NEXT:    kmovd %edi, %k1
13159; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13160; VLX-NEXT:    kmovq %k0, %rax
13161; VLX-NEXT:    retq
13162;
13163; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13164; NoVLX:       # %bb.0: # %entry
13165; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13166; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
13167; NoVLX-NEXT:    kmovw %edi, %k1
13168; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13169; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13170; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13171; NoVLX-NEXT:    kmovw %k0, %eax
13172; NoVLX-NEXT:    vzeroupper
13173; NoVLX-NEXT:    retq
13174entry:
13175  %0 = bitcast <2 x i64> %__a to <2 x i64>
13176  %load = load <2 x i64>, ptr %__b
13177  %1 = bitcast <2 x i64> %load to <2 x i64>
13178  %2 = icmp sge <2 x i64> %0, %1
13179  %3 = bitcast i8 %__u to <8 x i1>
13180  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13181  %4 = and <2 x i1> %2, %extract.i
13182  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13183  %6 = bitcast <64 x i1> %5 to i64
13184  ret i64 %6
13185}
13186
13187
13188define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13189; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13190; VLX:       # %bb.0: # %entry
13191; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13192; VLX-NEXT:    kmovq %k0, %rax
13193; VLX-NEXT:    retq
13194;
13195; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13196; NoVLX:       # %bb.0: # %entry
13197; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13198; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13199; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13200; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13201; NoVLX-NEXT:    kmovw %k0, %eax
13202; NoVLX-NEXT:    vzeroupper
13203; NoVLX-NEXT:    retq
13204entry:
13205  %0 = bitcast <2 x i64> %__a to <2 x i64>
13206  %load = load i64, ptr %__b
13207  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13208  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13209  %2 = icmp sge <2 x i64> %0, %1
13210  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13211  %4 = bitcast <64 x i1> %3 to i64
13212  ret i64 %4
13213}
13214
13215define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13216; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13217; VLX:       # %bb.0: # %entry
13218; VLX-NEXT:    kmovd %edi, %k1
13219; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13220; VLX-NEXT:    kmovq %k0, %rax
13221; VLX-NEXT:    retq
13222;
13223; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13224; NoVLX:       # %bb.0: # %entry
13225; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13226; NoVLX-NEXT:    kmovw %edi, %k1
13227; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13228; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13229; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13230; NoVLX-NEXT:    kmovw %k0, %eax
13231; NoVLX-NEXT:    vzeroupper
13232; NoVLX-NEXT:    retq
13233entry:
13234  %0 = bitcast <2 x i64> %__a to <2 x i64>
13235  %load = load i64, ptr %__b
13236  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13237  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13238  %2 = icmp sge <2 x i64> %0, %1
13239  %3 = bitcast i8 %__u to <8 x i1>
13240  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13241  %4 = and <2 x i1> %extract.i, %2
13242  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13243  %6 = bitcast <64 x i1> %5 to i64
13244  ret i64 %6
13245}
13246
13247
13248define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13249; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13250; VLX:       # %bb.0: # %entry
13251; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13252; VLX-NEXT:    kmovd %k0, %eax
13253; VLX-NEXT:    # kill: def $al killed $al killed $eax
13254; VLX-NEXT:    vzeroupper
13255; VLX-NEXT:    retq
13256;
13257; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13258; NoVLX:       # %bb.0: # %entry
13259; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13260; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13261; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13262; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13263; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13264; NoVLX-NEXT:    kmovw %k0, %eax
13265; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13266; NoVLX-NEXT:    vzeroupper
13267; NoVLX-NEXT:    retq
13268entry:
13269  %0 = bitcast <4 x i64> %__a to <4 x i64>
13270  %1 = bitcast <4 x i64> %__b to <4 x i64>
13271  %2 = icmp sge <4 x i64> %0, %1
13272  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13273  %4 = bitcast <8 x i1> %3 to i8
13274  ret i8 %4
13275}
13276
13277define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13278; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13279; VLX:       # %bb.0: # %entry
13280; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13281; VLX-NEXT:    kmovd %k0, %eax
13282; VLX-NEXT:    # kill: def $al killed $al killed $eax
13283; VLX-NEXT:    vzeroupper
13284; VLX-NEXT:    retq
13285;
13286; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13287; NoVLX:       # %bb.0: # %entry
13288; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13289; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13290; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13291; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13292; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13293; NoVLX-NEXT:    kmovw %k0, %eax
13294; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13295; NoVLX-NEXT:    vzeroupper
13296; NoVLX-NEXT:    retq
13297entry:
13298  %0 = bitcast <4 x i64> %__a to <4 x i64>
13299  %load = load <4 x i64>, ptr %__b
13300  %1 = bitcast <4 x i64> %load to <4 x i64>
13301  %2 = icmp sge <4 x i64> %0, %1
13302  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13303  %4 = bitcast <8 x i1> %3 to i8
13304  ret i8 %4
13305}
13306
13307define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13308; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13309; VLX:       # %bb.0: # %entry
13310; VLX-NEXT:    kmovd %edi, %k1
13311; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13312; VLX-NEXT:    kmovd %k0, %eax
13313; VLX-NEXT:    # kill: def $al killed $al killed $eax
13314; VLX-NEXT:    vzeroupper
13315; VLX-NEXT:    retq
13316;
13317; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13318; NoVLX:       # %bb.0: # %entry
13319; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13320; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13321; NoVLX-NEXT:    kmovw %edi, %k1
13322; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13323; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13324; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13325; NoVLX-NEXT:    kmovw %k0, %eax
13326; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13327; NoVLX-NEXT:    vzeroupper
13328; NoVLX-NEXT:    retq
13329entry:
13330  %0 = bitcast <4 x i64> %__a to <4 x i64>
13331  %1 = bitcast <4 x i64> %__b to <4 x i64>
13332  %2 = icmp sge <4 x i64> %0, %1
13333  %3 = bitcast i8 %__u to <8 x i1>
13334  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13335  %4 = and <4 x i1> %2, %extract.i
13336  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13337  %6 = bitcast <8 x i1> %5 to i8
13338  ret i8 %6
13339}
13340
13341define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13342; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13343; VLX:       # %bb.0: # %entry
13344; VLX-NEXT:    kmovd %edi, %k1
13345; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13346; VLX-NEXT:    kmovd %k0, %eax
13347; VLX-NEXT:    # kill: def $al killed $al killed $eax
13348; VLX-NEXT:    vzeroupper
13349; VLX-NEXT:    retq
13350;
13351; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13352; NoVLX:       # %bb.0: # %entry
13353; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13354; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13355; NoVLX-NEXT:    kmovw %edi, %k1
13356; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13357; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13358; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13359; NoVLX-NEXT:    kmovw %k0, %eax
13360; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13361; NoVLX-NEXT:    vzeroupper
13362; NoVLX-NEXT:    retq
13363entry:
13364  %0 = bitcast <4 x i64> %__a to <4 x i64>
13365  %load = load <4 x i64>, ptr %__b
13366  %1 = bitcast <4 x i64> %load to <4 x i64>
13367  %2 = icmp sge <4 x i64> %0, %1
13368  %3 = bitcast i8 %__u to <8 x i1>
13369  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13370  %4 = and <4 x i1> %2, %extract.i
13371  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13372  %6 = bitcast <8 x i1> %5 to i8
13373  ret i8 %6
13374}
13375
13376
13377define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13378; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13379; VLX:       # %bb.0: # %entry
13380; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13381; VLX-NEXT:    kmovd %k0, %eax
13382; VLX-NEXT:    # kill: def $al killed $al killed $eax
13383; VLX-NEXT:    vzeroupper
13384; VLX-NEXT:    retq
13385;
13386; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13387; NoVLX:       # %bb.0: # %entry
13388; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13389; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13390; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13391; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13392; NoVLX-NEXT:    kmovw %k0, %eax
13393; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13394; NoVLX-NEXT:    vzeroupper
13395; NoVLX-NEXT:    retq
13396entry:
13397  %0 = bitcast <4 x i64> %__a to <4 x i64>
13398  %load = load i64, ptr %__b
13399  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13400  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13401  %2 = icmp sge <4 x i64> %0, %1
13402  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13403  %4 = bitcast <8 x i1> %3 to i8
13404  ret i8 %4
13405}
13406
13407define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13408; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13409; VLX:       # %bb.0: # %entry
13410; VLX-NEXT:    kmovd %edi, %k1
13411; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13412; VLX-NEXT:    kmovd %k0, %eax
13413; VLX-NEXT:    # kill: def $al killed $al killed $eax
13414; VLX-NEXT:    vzeroupper
13415; VLX-NEXT:    retq
13416;
13417; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13418; NoVLX:       # %bb.0: # %entry
13419; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13420; NoVLX-NEXT:    kmovw %edi, %k1
13421; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13422; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13423; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13424; NoVLX-NEXT:    kmovw %k0, %eax
13425; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13426; NoVLX-NEXT:    vzeroupper
13427; NoVLX-NEXT:    retq
13428entry:
13429  %0 = bitcast <4 x i64> %__a to <4 x i64>
13430  %load = load i64, ptr %__b
13431  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13432  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13433  %2 = icmp sge <4 x i64> %0, %1
13434  %3 = bitcast i8 %__u to <8 x i1>
13435  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13436  %4 = and <4 x i1> %extract.i, %2
13437  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13438  %6 = bitcast <8 x i1> %5 to i8
13439  ret i8 %6
13440}
13441
13442
13443define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13444; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13445; VLX:       # %bb.0: # %entry
13446; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13447; VLX-NEXT:    kmovd %k0, %eax
13448; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13449; VLX-NEXT:    vzeroupper
13450; VLX-NEXT:    retq
13451;
13452; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13453; NoVLX:       # %bb.0: # %entry
13454; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13455; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13456; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13457; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13458; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13459; NoVLX-NEXT:    kmovw %k0, %eax
13460; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13461; NoVLX-NEXT:    vzeroupper
13462; NoVLX-NEXT:    retq
13463entry:
13464  %0 = bitcast <4 x i64> %__a to <4 x i64>
13465  %1 = bitcast <4 x i64> %__b to <4 x i64>
13466  %2 = icmp sge <4 x i64> %0, %1
13467  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13468  %4 = bitcast <16 x i1> %3 to i16
13469  ret i16 %4
13470}
13471
13472define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13473; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13474; VLX:       # %bb.0: # %entry
13475; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13476; VLX-NEXT:    kmovd %k0, %eax
13477; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13478; VLX-NEXT:    vzeroupper
13479; VLX-NEXT:    retq
13480;
13481; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13482; NoVLX:       # %bb.0: # %entry
13483; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13484; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13485; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13486; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13487; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13488; NoVLX-NEXT:    kmovw %k0, %eax
13489; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13490; NoVLX-NEXT:    vzeroupper
13491; NoVLX-NEXT:    retq
13492entry:
13493  %0 = bitcast <4 x i64> %__a to <4 x i64>
13494  %load = load <4 x i64>, ptr %__b
13495  %1 = bitcast <4 x i64> %load to <4 x i64>
13496  %2 = icmp sge <4 x i64> %0, %1
13497  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13498  %4 = bitcast <16 x i1> %3 to i16
13499  ret i16 %4
13500}
13501
13502define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13503; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13504; VLX:       # %bb.0: # %entry
13505; VLX-NEXT:    kmovd %edi, %k1
13506; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13507; VLX-NEXT:    kmovd %k0, %eax
13508; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13509; VLX-NEXT:    vzeroupper
13510; VLX-NEXT:    retq
13511;
13512; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13513; NoVLX:       # %bb.0: # %entry
13514; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13515; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13516; NoVLX-NEXT:    kmovw %edi, %k1
13517; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13518; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13519; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13520; NoVLX-NEXT:    kmovw %k0, %eax
13521; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13522; NoVLX-NEXT:    vzeroupper
13523; NoVLX-NEXT:    retq
13524entry:
13525  %0 = bitcast <4 x i64> %__a to <4 x i64>
13526  %1 = bitcast <4 x i64> %__b to <4 x i64>
13527  %2 = icmp sge <4 x i64> %0, %1
13528  %3 = bitcast i8 %__u to <8 x i1>
13529  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13530  %4 = and <4 x i1> %2, %extract.i
13531  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13532  %6 = bitcast <16 x i1> %5 to i16
13533  ret i16 %6
13534}
13535
13536define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13537; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13538; VLX:       # %bb.0: # %entry
13539; VLX-NEXT:    kmovd %edi, %k1
13540; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13541; VLX-NEXT:    kmovd %k0, %eax
13542; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13543; VLX-NEXT:    vzeroupper
13544; VLX-NEXT:    retq
13545;
13546; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13547; NoVLX:       # %bb.0: # %entry
13548; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13549; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13550; NoVLX-NEXT:    kmovw %edi, %k1
13551; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13552; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13553; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13554; NoVLX-NEXT:    kmovw %k0, %eax
13555; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13556; NoVLX-NEXT:    vzeroupper
13557; NoVLX-NEXT:    retq
13558entry:
13559  %0 = bitcast <4 x i64> %__a to <4 x i64>
13560  %load = load <4 x i64>, ptr %__b
13561  %1 = bitcast <4 x i64> %load to <4 x i64>
13562  %2 = icmp sge <4 x i64> %0, %1
13563  %3 = bitcast i8 %__u to <8 x i1>
13564  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13565  %4 = and <4 x i1> %2, %extract.i
13566  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13567  %6 = bitcast <16 x i1> %5 to i16
13568  ret i16 %6
13569}
13570
13571
13572define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13573; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13574; VLX:       # %bb.0: # %entry
13575; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13576; VLX-NEXT:    kmovd %k0, %eax
13577; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13578; VLX-NEXT:    vzeroupper
13579; VLX-NEXT:    retq
13580;
13581; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13582; NoVLX:       # %bb.0: # %entry
13583; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13584; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13585; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13586; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13587; NoVLX-NEXT:    kmovw %k0, %eax
13588; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13589; NoVLX-NEXT:    vzeroupper
13590; NoVLX-NEXT:    retq
13591entry:
13592  %0 = bitcast <4 x i64> %__a to <4 x i64>
13593  %load = load i64, ptr %__b
13594  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13595  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13596  %2 = icmp sge <4 x i64> %0, %1
13597  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13598  %4 = bitcast <16 x i1> %3 to i16
13599  ret i16 %4
13600}
13601
13602define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13603; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13604; VLX:       # %bb.0: # %entry
13605; VLX-NEXT:    kmovd %edi, %k1
13606; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13607; VLX-NEXT:    kmovd %k0, %eax
13608; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13609; VLX-NEXT:    vzeroupper
13610; VLX-NEXT:    retq
13611;
13612; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13613; NoVLX:       # %bb.0: # %entry
13614; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13615; NoVLX-NEXT:    kmovw %edi, %k1
13616; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13617; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13618; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13619; NoVLX-NEXT:    kmovw %k0, %eax
13620; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13621; NoVLX-NEXT:    vzeroupper
13622; NoVLX-NEXT:    retq
13623entry:
13624  %0 = bitcast <4 x i64> %__a to <4 x i64>
13625  %load = load i64, ptr %__b
13626  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13627  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13628  %2 = icmp sge <4 x i64> %0, %1
13629  %3 = bitcast i8 %__u to <8 x i1>
13630  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13631  %4 = and <4 x i1> %extract.i, %2
13632  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13633  %6 = bitcast <16 x i1> %5 to i16
13634  ret i16 %6
13635}
13636
13637
13638define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13639; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13640; VLX:       # %bb.0: # %entry
13641; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13642; VLX-NEXT:    kmovd %k0, %eax
13643; VLX-NEXT:    vzeroupper
13644; VLX-NEXT:    retq
13645;
13646; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13647; NoVLX:       # %bb.0: # %entry
13648; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13649; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13650; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13651; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13652; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13653; NoVLX-NEXT:    kmovw %k0, %eax
13654; NoVLX-NEXT:    vzeroupper
13655; NoVLX-NEXT:    retq
13656entry:
13657  %0 = bitcast <4 x i64> %__a to <4 x i64>
13658  %1 = bitcast <4 x i64> %__b to <4 x i64>
13659  %2 = icmp sge <4 x i64> %0, %1
13660  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13661  %4 = bitcast <32 x i1> %3 to i32
13662  ret i32 %4
13663}
13664
13665define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13666; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13667; VLX:       # %bb.0: # %entry
13668; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13669; VLX-NEXT:    kmovd %k0, %eax
13670; VLX-NEXT:    vzeroupper
13671; VLX-NEXT:    retq
13672;
13673; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13674; NoVLX:       # %bb.0: # %entry
13675; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13676; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13677; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13678; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13679; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13680; NoVLX-NEXT:    kmovw %k0, %eax
13681; NoVLX-NEXT:    vzeroupper
13682; NoVLX-NEXT:    retq
13683entry:
13684  %0 = bitcast <4 x i64> %__a to <4 x i64>
13685  %load = load <4 x i64>, ptr %__b
13686  %1 = bitcast <4 x i64> %load to <4 x i64>
13687  %2 = icmp sge <4 x i64> %0, %1
13688  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13689  %4 = bitcast <32 x i1> %3 to i32
13690  ret i32 %4
13691}
13692
13693define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13694; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13695; VLX:       # %bb.0: # %entry
13696; VLX-NEXT:    kmovd %edi, %k1
13697; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13698; VLX-NEXT:    kmovd %k0, %eax
13699; VLX-NEXT:    vzeroupper
13700; VLX-NEXT:    retq
13701;
13702; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13703; NoVLX:       # %bb.0: # %entry
13704; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13705; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13706; NoVLX-NEXT:    kmovw %edi, %k1
13707; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13708; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13709; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13710; NoVLX-NEXT:    kmovw %k0, %eax
13711; NoVLX-NEXT:    vzeroupper
13712; NoVLX-NEXT:    retq
13713entry:
13714  %0 = bitcast <4 x i64> %__a to <4 x i64>
13715  %1 = bitcast <4 x i64> %__b to <4 x i64>
13716  %2 = icmp sge <4 x i64> %0, %1
13717  %3 = bitcast i8 %__u to <8 x i1>
13718  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13719  %4 = and <4 x i1> %2, %extract.i
13720  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13721  %6 = bitcast <32 x i1> %5 to i32
13722  ret i32 %6
13723}
13724
13725define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13726; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13727; VLX:       # %bb.0: # %entry
13728; VLX-NEXT:    kmovd %edi, %k1
13729; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13730; VLX-NEXT:    kmovd %k0, %eax
13731; VLX-NEXT:    vzeroupper
13732; VLX-NEXT:    retq
13733;
13734; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13735; NoVLX:       # %bb.0: # %entry
13736; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13737; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13738; NoVLX-NEXT:    kmovw %edi, %k1
13739; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13740; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13741; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13742; NoVLX-NEXT:    kmovw %k0, %eax
13743; NoVLX-NEXT:    vzeroupper
13744; NoVLX-NEXT:    retq
13745entry:
13746  %0 = bitcast <4 x i64> %__a to <4 x i64>
13747  %load = load <4 x i64>, ptr %__b
13748  %1 = bitcast <4 x i64> %load to <4 x i64>
13749  %2 = icmp sge <4 x i64> %0, %1
13750  %3 = bitcast i8 %__u to <8 x i1>
13751  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13752  %4 = and <4 x i1> %2, %extract.i
13753  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13754  %6 = bitcast <32 x i1> %5 to i32
13755  ret i32 %6
13756}
13757
13758
13759define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13760; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13761; VLX:       # %bb.0: # %entry
13762; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13763; VLX-NEXT:    kmovd %k0, %eax
13764; VLX-NEXT:    vzeroupper
13765; VLX-NEXT:    retq
13766;
13767; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13768; NoVLX:       # %bb.0: # %entry
13769; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13770; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13771; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13772; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13773; NoVLX-NEXT:    kmovw %k0, %eax
13774; NoVLX-NEXT:    vzeroupper
13775; NoVLX-NEXT:    retq
13776entry:
13777  %0 = bitcast <4 x i64> %__a to <4 x i64>
13778  %load = load i64, ptr %__b
13779  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13780  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13781  %2 = icmp sge <4 x i64> %0, %1
13782  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13783  %4 = bitcast <32 x i1> %3 to i32
13784  ret i32 %4
13785}
13786
13787define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13788; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13789; VLX:       # %bb.0: # %entry
13790; VLX-NEXT:    kmovd %edi, %k1
13791; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13792; VLX-NEXT:    kmovd %k0, %eax
13793; VLX-NEXT:    vzeroupper
13794; VLX-NEXT:    retq
13795;
13796; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13797; NoVLX:       # %bb.0: # %entry
13798; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13799; NoVLX-NEXT:    kmovw %edi, %k1
13800; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13801; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13802; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13803; NoVLX-NEXT:    kmovw %k0, %eax
13804; NoVLX-NEXT:    vzeroupper
13805; NoVLX-NEXT:    retq
13806entry:
13807  %0 = bitcast <4 x i64> %__a to <4 x i64>
13808  %load = load i64, ptr %__b
13809  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13810  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13811  %2 = icmp sge <4 x i64> %0, %1
13812  %3 = bitcast i8 %__u to <8 x i1>
13813  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13814  %4 = and <4 x i1> %extract.i, %2
13815  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13816  %6 = bitcast <32 x i1> %5 to i32
13817  ret i32 %6
13818}
13819
13820
13821define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13822; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13823; VLX:       # %bb.0: # %entry
13824; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13825; VLX-NEXT:    kmovq %k0, %rax
13826; VLX-NEXT:    vzeroupper
13827; VLX-NEXT:    retq
13828;
13829; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13830; NoVLX:       # %bb.0: # %entry
13831; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13832; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13833; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13834; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13835; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13836; NoVLX-NEXT:    kmovw %k0, %eax
13837; NoVLX-NEXT:    vzeroupper
13838; NoVLX-NEXT:    retq
13839entry:
13840  %0 = bitcast <4 x i64> %__a to <4 x i64>
13841  %1 = bitcast <4 x i64> %__b to <4 x i64>
13842  %2 = icmp sge <4 x i64> %0, %1
13843  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13844  %4 = bitcast <64 x i1> %3 to i64
13845  ret i64 %4
13846}
13847
13848define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13849; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13850; VLX:       # %bb.0: # %entry
13851; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13852; VLX-NEXT:    kmovq %k0, %rax
13853; VLX-NEXT:    vzeroupper
13854; VLX-NEXT:    retq
13855;
13856; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13857; NoVLX:       # %bb.0: # %entry
13858; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13859; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13860; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13861; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13862; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13863; NoVLX-NEXT:    kmovw %k0, %eax
13864; NoVLX-NEXT:    vzeroupper
13865; NoVLX-NEXT:    retq
13866entry:
13867  %0 = bitcast <4 x i64> %__a to <4 x i64>
13868  %load = load <4 x i64>, ptr %__b
13869  %1 = bitcast <4 x i64> %load to <4 x i64>
13870  %2 = icmp sge <4 x i64> %0, %1
13871  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13872  %4 = bitcast <64 x i1> %3 to i64
13873  ret i64 %4
13874}
13875
13876define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13877; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13878; VLX:       # %bb.0: # %entry
13879; VLX-NEXT:    kmovd %edi, %k1
13880; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13881; VLX-NEXT:    kmovq %k0, %rax
13882; VLX-NEXT:    vzeroupper
13883; VLX-NEXT:    retq
13884;
13885; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13886; NoVLX:       # %bb.0: # %entry
13887; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13888; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13889; NoVLX-NEXT:    kmovw %edi, %k1
13890; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13891; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13892; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13893; NoVLX-NEXT:    kmovw %k0, %eax
13894; NoVLX-NEXT:    vzeroupper
13895; NoVLX-NEXT:    retq
13896entry:
13897  %0 = bitcast <4 x i64> %__a to <4 x i64>
13898  %1 = bitcast <4 x i64> %__b to <4 x i64>
13899  %2 = icmp sge <4 x i64> %0, %1
13900  %3 = bitcast i8 %__u to <8 x i1>
13901  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13902  %4 = and <4 x i1> %2, %extract.i
13903  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13904  %6 = bitcast <64 x i1> %5 to i64
13905  ret i64 %6
13906}
13907
13908define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13909; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13910; VLX:       # %bb.0: # %entry
13911; VLX-NEXT:    kmovd %edi, %k1
13912; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13913; VLX-NEXT:    kmovq %k0, %rax
13914; VLX-NEXT:    vzeroupper
13915; VLX-NEXT:    retq
13916;
13917; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13918; NoVLX:       # %bb.0: # %entry
13919; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13920; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13921; NoVLX-NEXT:    kmovw %edi, %k1
13922; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13923; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13924; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13925; NoVLX-NEXT:    kmovw %k0, %eax
13926; NoVLX-NEXT:    vzeroupper
13927; NoVLX-NEXT:    retq
13928entry:
13929  %0 = bitcast <4 x i64> %__a to <4 x i64>
13930  %load = load <4 x i64>, ptr %__b
13931  %1 = bitcast <4 x i64> %load to <4 x i64>
13932  %2 = icmp sge <4 x i64> %0, %1
13933  %3 = bitcast i8 %__u to <8 x i1>
13934  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13935  %4 = and <4 x i1> %2, %extract.i
13936  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13937  %6 = bitcast <64 x i1> %5 to i64
13938  ret i64 %6
13939}
13940
13941
13942define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13943; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13944; VLX:       # %bb.0: # %entry
13945; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13946; VLX-NEXT:    kmovq %k0, %rax
13947; VLX-NEXT:    vzeroupper
13948; VLX-NEXT:    retq
13949;
13950; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13951; NoVLX:       # %bb.0: # %entry
13952; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13953; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13954; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13955; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13956; NoVLX-NEXT:    kmovw %k0, %eax
13957; NoVLX-NEXT:    vzeroupper
13958; NoVLX-NEXT:    retq
13959entry:
13960  %0 = bitcast <4 x i64> %__a to <4 x i64>
13961  %load = load i64, ptr %__b
13962  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13963  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13964  %2 = icmp sge <4 x i64> %0, %1
13965  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13966  %4 = bitcast <64 x i1> %3 to i64
13967  ret i64 %4
13968}
13969
13970define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13971; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13972; VLX:       # %bb.0: # %entry
13973; VLX-NEXT:    kmovd %edi, %k1
13974; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13975; VLX-NEXT:    kmovq %k0, %rax
13976; VLX-NEXT:    vzeroupper
13977; VLX-NEXT:    retq
13978;
13979; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13980; NoVLX:       # %bb.0: # %entry
13981; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13982; NoVLX-NEXT:    kmovw %edi, %k1
13983; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13984; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13985; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13986; NoVLX-NEXT:    kmovw %k0, %eax
13987; NoVLX-NEXT:    vzeroupper
13988; NoVLX-NEXT:    retq
13989entry:
13990  %0 = bitcast <4 x i64> %__a to <4 x i64>
13991  %load = load i64, ptr %__b
13992  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13993  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13994  %2 = icmp sge <4 x i64> %0, %1
13995  %3 = bitcast i8 %__u to <8 x i1>
13996  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13997  %4 = and <4 x i1> %extract.i, %2
13998  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13999  %6 = bitcast <64 x i1> %5 to i64
14000  ret i64 %6
14001}
14002
14003
14004define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14005; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14006; VLX:       # %bb.0: # %entry
14007; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14008; VLX-NEXT:    kmovd %k0, %eax
14009; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14010; VLX-NEXT:    vzeroupper
14011; VLX-NEXT:    retq
14012;
14013; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14014; NoVLX:       # %bb.0: # %entry
14015; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14016; NoVLX-NEXT:    kmovw %k0, %eax
14017; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14018; NoVLX-NEXT:    vzeroupper
14019; NoVLX-NEXT:    retq
14020entry:
14021  %0 = bitcast <8 x i64> %__a to <8 x i64>
14022  %1 = bitcast <8 x i64> %__b to <8 x i64>
14023  %2 = icmp sge <8 x i64> %0, %1
14024  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14025  %4 = bitcast <16 x i1> %3 to i16
14026  ret i16 %4
14027}
14028
14029define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14030; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14031; VLX:       # %bb.0: # %entry
14032; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14033; VLX-NEXT:    kmovd %k0, %eax
14034; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14035; VLX-NEXT:    vzeroupper
14036; VLX-NEXT:    retq
14037;
14038; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14039; NoVLX:       # %bb.0: # %entry
14040; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14041; NoVLX-NEXT:    kmovw %k0, %eax
14042; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14043; NoVLX-NEXT:    vzeroupper
14044; NoVLX-NEXT:    retq
14045entry:
14046  %0 = bitcast <8 x i64> %__a to <8 x i64>
14047  %load = load <8 x i64>, ptr %__b
14048  %1 = bitcast <8 x i64> %load to <8 x i64>
14049  %2 = icmp sge <8 x i64> %0, %1
14050  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14051  %4 = bitcast <16 x i1> %3 to i16
14052  ret i16 %4
14053}
14054
14055define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14056; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14057; VLX:       # %bb.0: # %entry
14058; VLX-NEXT:    kmovd %edi, %k1
14059; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14060; VLX-NEXT:    kmovd %k0, %eax
14061; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14062; VLX-NEXT:    vzeroupper
14063; VLX-NEXT:    retq
14064;
14065; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14066; NoVLX:       # %bb.0: # %entry
14067; NoVLX-NEXT:    kmovw %edi, %k1
14068; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14069; NoVLX-NEXT:    kmovw %k0, %eax
14070; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14071; NoVLX-NEXT:    vzeroupper
14072; NoVLX-NEXT:    retq
14073entry:
14074  %0 = bitcast <8 x i64> %__a to <8 x i64>
14075  %1 = bitcast <8 x i64> %__b to <8 x i64>
14076  %2 = icmp sge <8 x i64> %0, %1
14077  %3 = bitcast i8 %__u to <8 x i1>
14078  %4 = and <8 x i1> %2, %3
14079  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14080  %6 = bitcast <16 x i1> %5 to i16
14081  ret i16 %6
14082}
14083
14084define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14085; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14086; VLX:       # %bb.0: # %entry
14087; VLX-NEXT:    kmovd %edi, %k1
14088; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14089; VLX-NEXT:    kmovd %k0, %eax
14090; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14091; VLX-NEXT:    vzeroupper
14092; VLX-NEXT:    retq
14093;
14094; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14095; NoVLX:       # %bb.0: # %entry
14096; NoVLX-NEXT:    kmovw %edi, %k1
14097; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14098; NoVLX-NEXT:    kmovw %k0, %eax
14099; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14100; NoVLX-NEXT:    vzeroupper
14101; NoVLX-NEXT:    retq
14102entry:
14103  %0 = bitcast <8 x i64> %__a to <8 x i64>
14104  %load = load <8 x i64>, ptr %__b
14105  %1 = bitcast <8 x i64> %load to <8 x i64>
14106  %2 = icmp sge <8 x i64> %0, %1
14107  %3 = bitcast i8 %__u to <8 x i1>
14108  %4 = and <8 x i1> %2, %3
14109  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14110  %6 = bitcast <16 x i1> %5 to i16
14111  ret i16 %6
14112}
14113
14114
14115define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14116; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14117; VLX:       # %bb.0: # %entry
14118; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14119; VLX-NEXT:    kmovd %k0, %eax
14120; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14121; VLX-NEXT:    vzeroupper
14122; VLX-NEXT:    retq
14123;
14124; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14125; NoVLX:       # %bb.0: # %entry
14126; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14127; NoVLX-NEXT:    kmovw %k0, %eax
14128; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14129; NoVLX-NEXT:    vzeroupper
14130; NoVLX-NEXT:    retq
14131entry:
14132  %0 = bitcast <8 x i64> %__a to <8 x i64>
14133  %load = load i64, ptr %__b
14134  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14135  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14136  %2 = icmp sge <8 x i64> %0, %1
14137  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14138  %4 = bitcast <16 x i1> %3 to i16
14139  ret i16 %4
14140}
14141
14142define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14143; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14144; VLX:       # %bb.0: # %entry
14145; VLX-NEXT:    kmovd %edi, %k1
14146; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14147; VLX-NEXT:    kmovd %k0, %eax
14148; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14149; VLX-NEXT:    vzeroupper
14150; VLX-NEXT:    retq
14151;
14152; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14153; NoVLX:       # %bb.0: # %entry
14154; NoVLX-NEXT:    kmovw %edi, %k1
14155; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14156; NoVLX-NEXT:    kmovw %k0, %eax
14157; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14158; NoVLX-NEXT:    vzeroupper
14159; NoVLX-NEXT:    retq
14160entry:
14161  %0 = bitcast <8 x i64> %__a to <8 x i64>
14162  %load = load i64, ptr %__b
14163  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14164  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14165  %2 = icmp sge <8 x i64> %0, %1
14166  %3 = bitcast i8 %__u to <8 x i1>
14167  %4 = and <8 x i1> %3, %2
14168  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14169  %6 = bitcast <16 x i1> %5 to i16
14170  ret i16 %6
14171}
14172
14173
14174define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14175; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14176; VLX:       # %bb.0: # %entry
14177; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14178; VLX-NEXT:    kmovd %k0, %eax
14179; VLX-NEXT:    vzeroupper
14180; VLX-NEXT:    retq
14181;
14182; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14183; NoVLX:       # %bb.0: # %entry
14184; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14185; NoVLX-NEXT:    kmovw %k0, %eax
14186; NoVLX-NEXT:    vzeroupper
14187; NoVLX-NEXT:    retq
14188entry:
14189  %0 = bitcast <8 x i64> %__a to <8 x i64>
14190  %1 = bitcast <8 x i64> %__b to <8 x i64>
14191  %2 = icmp sge <8 x i64> %0, %1
14192  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14193  %4 = bitcast <32 x i1> %3 to i32
14194  ret i32 %4
14195}
14196
14197define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14198; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14199; VLX:       # %bb.0: # %entry
14200; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14201; VLX-NEXT:    kmovd %k0, %eax
14202; VLX-NEXT:    vzeroupper
14203; VLX-NEXT:    retq
14204;
14205; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14206; NoVLX:       # %bb.0: # %entry
14207; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14208; NoVLX-NEXT:    kmovw %k0, %eax
14209; NoVLX-NEXT:    vzeroupper
14210; NoVLX-NEXT:    retq
14211entry:
14212  %0 = bitcast <8 x i64> %__a to <8 x i64>
14213  %load = load <8 x i64>, ptr %__b
14214  %1 = bitcast <8 x i64> %load to <8 x i64>
14215  %2 = icmp sge <8 x i64> %0, %1
14216  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14217  %4 = bitcast <32 x i1> %3 to i32
14218  ret i32 %4
14219}
14220
14221define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14222; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14223; VLX:       # %bb.0: # %entry
14224; VLX-NEXT:    kmovd %edi, %k1
14225; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14226; VLX-NEXT:    kmovd %k0, %eax
14227; VLX-NEXT:    vzeroupper
14228; VLX-NEXT:    retq
14229;
14230; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14231; NoVLX:       # %bb.0: # %entry
14232; NoVLX-NEXT:    kmovw %edi, %k1
14233; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14234; NoVLX-NEXT:    kmovw %k0, %eax
14235; NoVLX-NEXT:    vzeroupper
14236; NoVLX-NEXT:    retq
14237entry:
14238  %0 = bitcast <8 x i64> %__a to <8 x i64>
14239  %1 = bitcast <8 x i64> %__b to <8 x i64>
14240  %2 = icmp sge <8 x i64> %0, %1
14241  %3 = bitcast i8 %__u to <8 x i1>
14242  %4 = and <8 x i1> %2, %3
14243  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14244  %6 = bitcast <32 x i1> %5 to i32
14245  ret i32 %6
14246}
14247
14248define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14249; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14250; VLX:       # %bb.0: # %entry
14251; VLX-NEXT:    kmovd %edi, %k1
14252; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14253; VLX-NEXT:    kmovd %k0, %eax
14254; VLX-NEXT:    vzeroupper
14255; VLX-NEXT:    retq
14256;
14257; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14258; NoVLX:       # %bb.0: # %entry
14259; NoVLX-NEXT:    kmovw %edi, %k1
14260; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14261; NoVLX-NEXT:    kmovw %k0, %eax
14262; NoVLX-NEXT:    vzeroupper
14263; NoVLX-NEXT:    retq
14264entry:
14265  %0 = bitcast <8 x i64> %__a to <8 x i64>
14266  %load = load <8 x i64>, ptr %__b
14267  %1 = bitcast <8 x i64> %load to <8 x i64>
14268  %2 = icmp sge <8 x i64> %0, %1
14269  %3 = bitcast i8 %__u to <8 x i1>
14270  %4 = and <8 x i1> %2, %3
14271  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14272  %6 = bitcast <32 x i1> %5 to i32
14273  ret i32 %6
14274}
14275
14276
14277define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14278; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14279; VLX:       # %bb.0: # %entry
14280; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14281; VLX-NEXT:    kmovd %k0, %eax
14282; VLX-NEXT:    vzeroupper
14283; VLX-NEXT:    retq
14284;
14285; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14286; NoVLX:       # %bb.0: # %entry
14287; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14288; NoVLX-NEXT:    kmovw %k0, %eax
14289; NoVLX-NEXT:    vzeroupper
14290; NoVLX-NEXT:    retq
14291entry:
14292  %0 = bitcast <8 x i64> %__a to <8 x i64>
14293  %load = load i64, ptr %__b
14294  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14295  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14296  %2 = icmp sge <8 x i64> %0, %1
14297  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14298  %4 = bitcast <32 x i1> %3 to i32
14299  ret i32 %4
14300}
14301
14302define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14303; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14304; VLX:       # %bb.0: # %entry
14305; VLX-NEXT:    kmovd %edi, %k1
14306; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14307; VLX-NEXT:    kmovd %k0, %eax
14308; VLX-NEXT:    vzeroupper
14309; VLX-NEXT:    retq
14310;
14311; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14312; NoVLX:       # %bb.0: # %entry
14313; NoVLX-NEXT:    kmovw %edi, %k1
14314; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14315; NoVLX-NEXT:    kmovw %k0, %eax
14316; NoVLX-NEXT:    vzeroupper
14317; NoVLX-NEXT:    retq
14318entry:
14319  %0 = bitcast <8 x i64> %__a to <8 x i64>
14320  %load = load i64, ptr %__b
14321  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14322  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14323  %2 = icmp sge <8 x i64> %0, %1
14324  %3 = bitcast i8 %__u to <8 x i1>
14325  %4 = and <8 x i1> %3, %2
14326  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14327  %6 = bitcast <32 x i1> %5 to i32
14328  ret i32 %6
14329}
14330
14331
14332define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14333; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14334; VLX:       # %bb.0: # %entry
14335; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14336; VLX-NEXT:    kmovq %k0, %rax
14337; VLX-NEXT:    vzeroupper
14338; VLX-NEXT:    retq
14339;
14340; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14341; NoVLX:       # %bb.0: # %entry
14342; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14343; NoVLX-NEXT:    kmovw %k0, %eax
14344; NoVLX-NEXT:    vzeroupper
14345; NoVLX-NEXT:    retq
14346entry:
14347  %0 = bitcast <8 x i64> %__a to <8 x i64>
14348  %1 = bitcast <8 x i64> %__b to <8 x i64>
14349  %2 = icmp sge <8 x i64> %0, %1
14350  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14351  %4 = bitcast <64 x i1> %3 to i64
14352  ret i64 %4
14353}
14354
14355define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14356; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14357; VLX:       # %bb.0: # %entry
14358; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14359; VLX-NEXT:    kmovq %k0, %rax
14360; VLX-NEXT:    vzeroupper
14361; VLX-NEXT:    retq
14362;
14363; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14364; NoVLX:       # %bb.0: # %entry
14365; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14366; NoVLX-NEXT:    kmovw %k0, %eax
14367; NoVLX-NEXT:    vzeroupper
14368; NoVLX-NEXT:    retq
14369entry:
14370  %0 = bitcast <8 x i64> %__a to <8 x i64>
14371  %load = load <8 x i64>, ptr %__b
14372  %1 = bitcast <8 x i64> %load to <8 x i64>
14373  %2 = icmp sge <8 x i64> %0, %1
14374  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14375  %4 = bitcast <64 x i1> %3 to i64
14376  ret i64 %4
14377}
14378
14379define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14380; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14381; VLX:       # %bb.0: # %entry
14382; VLX-NEXT:    kmovd %edi, %k1
14383; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14384; VLX-NEXT:    kmovq %k0, %rax
14385; VLX-NEXT:    vzeroupper
14386; VLX-NEXT:    retq
14387;
14388; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14389; NoVLX:       # %bb.0: # %entry
14390; NoVLX-NEXT:    kmovw %edi, %k1
14391; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14392; NoVLX-NEXT:    kmovw %k0, %eax
14393; NoVLX-NEXT:    vzeroupper
14394; NoVLX-NEXT:    retq
14395entry:
14396  %0 = bitcast <8 x i64> %__a to <8 x i64>
14397  %1 = bitcast <8 x i64> %__b to <8 x i64>
14398  %2 = icmp sge <8 x i64> %0, %1
14399  %3 = bitcast i8 %__u to <8 x i1>
14400  %4 = and <8 x i1> %2, %3
14401  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14402  %6 = bitcast <64 x i1> %5 to i64
14403  ret i64 %6
14404}
14405
14406define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14407; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14408; VLX:       # %bb.0: # %entry
14409; VLX-NEXT:    kmovd %edi, %k1
14410; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14411; VLX-NEXT:    kmovq %k0, %rax
14412; VLX-NEXT:    vzeroupper
14413; VLX-NEXT:    retq
14414;
14415; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14416; NoVLX:       # %bb.0: # %entry
14417; NoVLX-NEXT:    kmovw %edi, %k1
14418; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14419; NoVLX-NEXT:    kmovw %k0, %eax
14420; NoVLX-NEXT:    vzeroupper
14421; NoVLX-NEXT:    retq
14422entry:
14423  %0 = bitcast <8 x i64> %__a to <8 x i64>
14424  %load = load <8 x i64>, ptr %__b
14425  %1 = bitcast <8 x i64> %load to <8 x i64>
14426  %2 = icmp sge <8 x i64> %0, %1
14427  %3 = bitcast i8 %__u to <8 x i1>
14428  %4 = and <8 x i1> %2, %3
14429  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14430  %6 = bitcast <64 x i1> %5 to i64
14431  ret i64 %6
14432}
14433
14434
14435define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14436; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14437; VLX:       # %bb.0: # %entry
14438; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14439; VLX-NEXT:    kmovq %k0, %rax
14440; VLX-NEXT:    vzeroupper
14441; VLX-NEXT:    retq
14442;
14443; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14444; NoVLX:       # %bb.0: # %entry
14445; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14446; NoVLX-NEXT:    kmovw %k0, %eax
14447; NoVLX-NEXT:    vzeroupper
14448; NoVLX-NEXT:    retq
14449entry:
14450  %0 = bitcast <8 x i64> %__a to <8 x i64>
14451  %load = load i64, ptr %__b
14452  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14453  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14454  %2 = icmp sge <8 x i64> %0, %1
14455  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14456  %4 = bitcast <64 x i1> %3 to i64
14457  ret i64 %4
14458}
14459
14460define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14461; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14462; VLX:       # %bb.0: # %entry
14463; VLX-NEXT:    kmovd %edi, %k1
14464; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14465; VLX-NEXT:    kmovq %k0, %rax
14466; VLX-NEXT:    vzeroupper
14467; VLX-NEXT:    retq
14468;
14469; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14470; NoVLX:       # %bb.0: # %entry
14471; NoVLX-NEXT:    kmovw %edi, %k1
14472; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14473; NoVLX-NEXT:    kmovw %k0, %eax
14474; NoVLX-NEXT:    vzeroupper
14475; NoVLX-NEXT:    retq
14476entry:
14477  %0 = bitcast <8 x i64> %__a to <8 x i64>
14478  %load = load i64, ptr %__b
14479  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14480  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14481  %2 = icmp sge <8 x i64> %0, %1
14482  %3 = bitcast i8 %__u to <8 x i1>
14483  %4 = and <8 x i1> %3, %2
14484  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14485  %6 = bitcast <64 x i1> %5 to i64
14486  ret i64 %6
14487}
14488
14489
14490define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14491; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14492; VLX:       # %bb.0: # %entry
14493; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0
14494; VLX-NEXT:    kmovd %k0, %eax
14495; VLX-NEXT:    retq
14496;
14497; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14498; NoVLX:       # %bb.0: # %entry
14499; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14500; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14501; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14502; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14503; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14504; NoVLX-NEXT:    kmovw %k0, %eax
14505; NoVLX-NEXT:    vzeroupper
14506; NoVLX-NEXT:    retq
14507entry:
14508  %0 = bitcast <2 x i64> %__a to <16 x i8>
14509  %1 = bitcast <2 x i64> %__b to <16 x i8>
14510  %2 = icmp ult <16 x i8> %0, %1
14511  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14512  %4 = bitcast <32 x i1> %3 to i32
14513  ret i32 %4
14514}
14515
14516define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14517; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14518; VLX:       # %bb.0: # %entry
14519; VLX-NEXT:    vpcmpltub (%rdi), %xmm0, %k0
14520; VLX-NEXT:    kmovd %k0, %eax
14521; VLX-NEXT:    retq
14522;
14523; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14524; NoVLX:       # %bb.0: # %entry
14525; NoVLX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm1
14526; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14527; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14528; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14529; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14530; NoVLX-NEXT:    kmovw %k0, %eax
14531; NoVLX-NEXT:    vzeroupper
14532; NoVLX-NEXT:    retq
14533entry:
14534  %0 = bitcast <2 x i64> %__a to <16 x i8>
14535  %load = load <2 x i64>, ptr %__b
14536  %1 = bitcast <2 x i64> %load to <16 x i8>
14537  %2 = icmp ult <16 x i8> %0, %1
14538  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14539  %4 = bitcast <32 x i1> %3 to i32
14540  ret i32 %4
14541}
14542
14543define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14544; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14545; VLX:       # %bb.0: # %entry
14546; VLX-NEXT:    kmovd %edi, %k1
14547; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14548; VLX-NEXT:    kmovd %k0, %eax
14549; VLX-NEXT:    retq
14550;
14551; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14552; NoVLX:       # %bb.0: # %entry
14553; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14554; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14555; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14556; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14557; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14558; NoVLX-NEXT:    kmovw %k0, %eax
14559; NoVLX-NEXT:    andl %edi, %eax
14560; NoVLX-NEXT:    vzeroupper
14561; NoVLX-NEXT:    retq
14562entry:
14563  %0 = bitcast <2 x i64> %__a to <16 x i8>
14564  %1 = bitcast <2 x i64> %__b to <16 x i8>
14565  %2 = icmp ult <16 x i8> %0, %1
14566  %3 = bitcast i16 %__u to <16 x i1>
14567  %4 = and <16 x i1> %2, %3
14568  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14569  %6 = bitcast <32 x i1> %5 to i32
14570  ret i32 %6
14571}
14572
14573define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14574; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14575; VLX:       # %bb.0: # %entry
14576; VLX-NEXT:    kmovd %edi, %k1
14577; VLX-NEXT:    vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14578; VLX-NEXT:    kmovd %k0, %eax
14579; VLX-NEXT:    retq
14580;
14581; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14582; NoVLX:       # %bb.0: # %entry
14583; NoVLX-NEXT:    vpmaxub (%rsi), %xmm0, %xmm1
14584; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14585; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14586; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14587; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14588; NoVLX-NEXT:    kmovw %k0, %eax
14589; NoVLX-NEXT:    andl %edi, %eax
14590; NoVLX-NEXT:    vzeroupper
14591; NoVLX-NEXT:    retq
14592entry:
14593  %0 = bitcast <2 x i64> %__a to <16 x i8>
14594  %load = load <2 x i64>, ptr %__b
14595  %1 = bitcast <2 x i64> %load to <16 x i8>
14596  %2 = icmp ult <16 x i8> %0, %1
14597  %3 = bitcast i16 %__u to <16 x i1>
14598  %4 = and <16 x i1> %2, %3
14599  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14600  %6 = bitcast <32 x i1> %5 to i32
14601  ret i32 %6
14602}
14603
14604
14605define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14606; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14607; VLX:       # %bb.0: # %entry
14608; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0
14609; VLX-NEXT:    kmovq %k0, %rax
14610; VLX-NEXT:    retq
14611;
14612; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14613; NoVLX:       # %bb.0: # %entry
14614; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14615; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14616; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14617; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14618; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14619; NoVLX-NEXT:    kmovw %k0, %eax
14620; NoVLX-NEXT:    vzeroupper
14621; NoVLX-NEXT:    retq
14622entry:
14623  %0 = bitcast <2 x i64> %__a to <16 x i8>
14624  %1 = bitcast <2 x i64> %__b to <16 x i8>
14625  %2 = icmp ult <16 x i8> %0, %1
14626  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14627  %4 = bitcast <64 x i1> %3 to i64
14628  ret i64 %4
14629}
14630
14631define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14632; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14633; VLX:       # %bb.0: # %entry
14634; VLX-NEXT:    vpcmpltub (%rdi), %xmm0, %k0
14635; VLX-NEXT:    kmovq %k0, %rax
14636; VLX-NEXT:    retq
14637;
14638; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14639; NoVLX:       # %bb.0: # %entry
14640; NoVLX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm1
14641; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14642; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14643; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14644; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14645; NoVLX-NEXT:    kmovw %k0, %eax
14646; NoVLX-NEXT:    vzeroupper
14647; NoVLX-NEXT:    retq
14648entry:
14649  %0 = bitcast <2 x i64> %__a to <16 x i8>
14650  %load = load <2 x i64>, ptr %__b
14651  %1 = bitcast <2 x i64> %load to <16 x i8>
14652  %2 = icmp ult <16 x i8> %0, %1
14653  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14654  %4 = bitcast <64 x i1> %3 to i64
14655  ret i64 %4
14656}
14657
14658define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14659; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14660; VLX:       # %bb.0: # %entry
14661; VLX-NEXT:    kmovd %edi, %k1
14662; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14663; VLX-NEXT:    kmovq %k0, %rax
14664; VLX-NEXT:    retq
14665;
14666; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14667; NoVLX:       # %bb.0: # %entry
14668; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14669; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14670; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14671; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14672; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14673; NoVLX-NEXT:    kmovw %k0, %eax
14674; NoVLX-NEXT:    andl %edi, %eax
14675; NoVLX-NEXT:    vzeroupper
14676; NoVLX-NEXT:    retq
14677entry:
14678  %0 = bitcast <2 x i64> %__a to <16 x i8>
14679  %1 = bitcast <2 x i64> %__b to <16 x i8>
14680  %2 = icmp ult <16 x i8> %0, %1
14681  %3 = bitcast i16 %__u to <16 x i1>
14682  %4 = and <16 x i1> %2, %3
14683  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14684  %6 = bitcast <64 x i1> %5 to i64
14685  ret i64 %6
14686}
14687
14688define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14689; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14690; VLX:       # %bb.0: # %entry
14691; VLX-NEXT:    kmovd %edi, %k1
14692; VLX-NEXT:    vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14693; VLX-NEXT:    kmovq %k0, %rax
14694; VLX-NEXT:    retq
14695;
14696; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14697; NoVLX:       # %bb.0: # %entry
14698; NoVLX-NEXT:    vpmaxub (%rsi), %xmm0, %xmm1
14699; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14700; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14701; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14702; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14703; NoVLX-NEXT:    kmovw %k0, %eax
14704; NoVLX-NEXT:    andl %edi, %eax
14705; NoVLX-NEXT:    vzeroupper
14706; NoVLX-NEXT:    retq
14707entry:
14708  %0 = bitcast <2 x i64> %__a to <16 x i8>
14709  %load = load <2 x i64>, ptr %__b
14710  %1 = bitcast <2 x i64> %load to <16 x i8>
14711  %2 = icmp ult <16 x i8> %0, %1
14712  %3 = bitcast i16 %__u to <16 x i1>
14713  %4 = and <16 x i1> %2, %3
14714  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14715  %6 = bitcast <64 x i1> %5 to i64
14716  ret i64 %6
14717}
14718
14719
14720define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14721; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14722; VLX:       # %bb.0: # %entry
14723; VLX-NEXT:    vpcmpltub %ymm1, %ymm0, %k0
14724; VLX-NEXT:    kmovq %k0, %rax
14725; VLX-NEXT:    vzeroupper
14726; VLX-NEXT:    retq
14727;
14728; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14729; NoVLX:       # %bb.0: # %entry
14730; NoVLX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm1
14731; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
14732; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14733; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
14734; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
14735; NoVLX-NEXT:    kmovw %k0, %ecx
14736; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
14737; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14738; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14739; NoVLX-NEXT:    kmovw %k0, %eax
14740; NoVLX-NEXT:    shll $16, %eax
14741; NoVLX-NEXT:    orl %ecx, %eax
14742; NoVLX-NEXT:    vzeroupper
14743; NoVLX-NEXT:    retq
14744entry:
14745  %0 = bitcast <4 x i64> %__a to <32 x i8>
14746  %1 = bitcast <4 x i64> %__b to <32 x i8>
14747  %2 = icmp ult <32 x i8> %0, %1
14748  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14749  %4 = bitcast <64 x i1> %3 to i64
14750  ret i64 %4
14751}
14752
14753define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
14754; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14755; VLX:       # %bb.0: # %entry
14756; VLX-NEXT:    vpcmpltub (%rdi), %ymm0, %k0
14757; VLX-NEXT:    kmovq %k0, %rax
14758; VLX-NEXT:    vzeroupper
14759; VLX-NEXT:    retq
14760;
14761; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14762; NoVLX:       # %bb.0: # %entry
14763; NoVLX-NEXT:    vpmaxub (%rdi), %ymm0, %ymm1
14764; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
14765; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14766; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
14767; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
14768; NoVLX-NEXT:    kmovw %k0, %ecx
14769; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
14770; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14771; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14772; NoVLX-NEXT:    kmovw %k0, %eax
14773; NoVLX-NEXT:    shll $16, %eax
14774; NoVLX-NEXT:    orl %ecx, %eax
14775; NoVLX-NEXT:    vzeroupper
14776; NoVLX-NEXT:    retq
14777entry:
14778  %0 = bitcast <4 x i64> %__a to <32 x i8>
14779  %load = load <4 x i64>, ptr %__b
14780  %1 = bitcast <4 x i64> %load to <32 x i8>
14781  %2 = icmp ult <32 x i8> %0, %1
14782  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14783  %4 = bitcast <64 x i1> %3 to i64
14784  ret i64 %4
14785}
14786
14787define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14788; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14789; VLX:       # %bb.0: # %entry
14790; VLX-NEXT:    kmovd %edi, %k1
14791; VLX-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1}
14792; VLX-NEXT:    kmovq %k0, %rax
14793; VLX-NEXT:    vzeroupper
14794; VLX-NEXT:    retq
14795;
14796; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14797; NoVLX:       # %bb.0: # %entry
14798; NoVLX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm1
14799; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
14800; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14801; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
14802; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
14803; NoVLX-NEXT:    kmovw %k0, %eax
14804; NoVLX-NEXT:    andl %edi, %eax
14805; NoVLX-NEXT:    shrl $16, %edi
14806; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
14807; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14808; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14809; NoVLX-NEXT:    kmovw %k0, %ecx
14810; NoVLX-NEXT:    andl %edi, %ecx
14811; NoVLX-NEXT:    shll $16, %ecx
14812; NoVLX-NEXT:    movzwl %ax, %eax
14813; NoVLX-NEXT:    orl %ecx, %eax
14814; NoVLX-NEXT:    vzeroupper
14815; NoVLX-NEXT:    retq
14816entry:
14817  %0 = bitcast <4 x i64> %__a to <32 x i8>
14818  %1 = bitcast <4 x i64> %__b to <32 x i8>
14819  %2 = icmp ult <32 x i8> %0, %1
14820  %3 = bitcast i32 %__u to <32 x i1>
14821  %4 = and <32 x i1> %2, %3
14822  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14823  %6 = bitcast <64 x i1> %5 to i64
14824  ret i64 %6
14825}
14826
14827define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
14828; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14829; VLX:       # %bb.0: # %entry
14830; VLX-NEXT:    kmovd %edi, %k1
14831; VLX-NEXT:    vpcmpltub (%rsi), %ymm0, %k0 {%k1}
14832; VLX-NEXT:    kmovq %k0, %rax
14833; VLX-NEXT:    vzeroupper
14834; VLX-NEXT:    retq
14835;
14836; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14837; NoVLX:       # %bb.0: # %entry
14838; NoVLX-NEXT:    vpmaxub (%rsi), %ymm0, %ymm1
14839; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
14840; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14841; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
14842; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
14843; NoVLX-NEXT:    kmovw %k0, %eax
14844; NoVLX-NEXT:    andl %edi, %eax
14845; NoVLX-NEXT:    shrl $16, %edi
14846; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
14847; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14848; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14849; NoVLX-NEXT:    kmovw %k0, %ecx
14850; NoVLX-NEXT:    andl %edi, %ecx
14851; NoVLX-NEXT:    shll $16, %ecx
14852; NoVLX-NEXT:    movzwl %ax, %eax
14853; NoVLX-NEXT:    orl %ecx, %eax
14854; NoVLX-NEXT:    vzeroupper
14855; NoVLX-NEXT:    retq
14856entry:
14857  %0 = bitcast <4 x i64> %__a to <32 x i8>
14858  %load = load <4 x i64>, ptr %__b
14859  %1 = bitcast <4 x i64> %load to <32 x i8>
14860  %2 = icmp ult <32 x i8> %0, %1
14861  %3 = bitcast i32 %__u to <32 x i1>
14862  %4 = and <32 x i1> %2, %3
14863  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14864  %6 = bitcast <64 x i1> %5 to i64
14865  ret i64 %6
14866}
14867
14868
14869define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14870; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14871; VLX:       # %bb.0: # %entry
14872; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
14873; VLX-NEXT:    kmovd %k0, %eax
14874; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14875; VLX-NEXT:    retq
14876;
14877; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14878; NoVLX:       # %bb.0: # %entry
14879; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
14880; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
14881; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14882; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
14883; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
14884; NoVLX-NEXT:    kmovw %k0, %eax
14885; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14886; NoVLX-NEXT:    vzeroupper
14887; NoVLX-NEXT:    retq
14888entry:
14889  %0 = bitcast <2 x i64> %__a to <8 x i16>
14890  %1 = bitcast <2 x i64> %__b to <8 x i16>
14891  %2 = icmp ult <8 x i16> %0, %1
14892  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14893  %4 = bitcast <16 x i1> %3 to i16
14894  ret i16 %4
14895}
14896
14897define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14898; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14899; VLX:       # %bb.0: # %entry
14900; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
14901; VLX-NEXT:    kmovd %k0, %eax
14902; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14903; VLX-NEXT:    retq
14904;
14905; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14906; NoVLX:       # %bb.0: # %entry
14907; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
14908; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
14909; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14910; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
14911; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
14912; NoVLX-NEXT:    kmovw %k0, %eax
14913; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14914; NoVLX-NEXT:    vzeroupper
14915; NoVLX-NEXT:    retq
14916entry:
14917  %0 = bitcast <2 x i64> %__a to <8 x i16>
14918  %load = load <2 x i64>, ptr %__b
14919  %1 = bitcast <2 x i64> %load to <8 x i16>
14920  %2 = icmp ult <8 x i16> %0, %1
14921  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14922  %4 = bitcast <16 x i1> %3 to i16
14923  ret i16 %4
14924}
14925
14926define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14927; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14928; VLX:       # %bb.0: # %entry
14929; VLX-NEXT:    kmovd %edi, %k1
14930; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
14931; VLX-NEXT:    kmovd %k0, %eax
14932; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14933; VLX-NEXT:    retq
14934;
14935; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14936; NoVLX:       # %bb.0: # %entry
14937; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
14938; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
14939; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14940; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
14941; NoVLX-NEXT:    kmovw %edi, %k1
14942; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
14943; NoVLX-NEXT:    kmovw %k0, %eax
14944; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14945; NoVLX-NEXT:    vzeroupper
14946; NoVLX-NEXT:    retq
14947entry:
14948  %0 = bitcast <2 x i64> %__a to <8 x i16>
14949  %1 = bitcast <2 x i64> %__b to <8 x i16>
14950  %2 = icmp ult <8 x i16> %0, %1
14951  %3 = bitcast i8 %__u to <8 x i1>
14952  %4 = and <8 x i1> %2, %3
14953  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14954  %6 = bitcast <16 x i1> %5 to i16
14955  ret i16 %6
14956}
14957
14958define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14959; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14960; VLX:       # %bb.0: # %entry
14961; VLX-NEXT:    kmovd %edi, %k1
14962; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
14963; VLX-NEXT:    kmovd %k0, %eax
14964; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14965; VLX-NEXT:    retq
14966;
14967; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14968; NoVLX:       # %bb.0: # %entry
14969; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
14970; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
14971; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14972; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
14973; NoVLX-NEXT:    kmovw %edi, %k1
14974; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
14975; NoVLX-NEXT:    kmovw %k0, %eax
14976; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14977; NoVLX-NEXT:    vzeroupper
14978; NoVLX-NEXT:    retq
14979entry:
14980  %0 = bitcast <2 x i64> %__a to <8 x i16>
14981  %load = load <2 x i64>, ptr %__b
14982  %1 = bitcast <2 x i64> %load to <8 x i16>
14983  %2 = icmp ult <8 x i16> %0, %1
14984  %3 = bitcast i8 %__u to <8 x i1>
14985  %4 = and <8 x i1> %2, %3
14986  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14987  %6 = bitcast <16 x i1> %5 to i16
14988  ret i16 %6
14989}
14990
14991
14992define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14993; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
14994; VLX:       # %bb.0: # %entry
14995; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
14996; VLX-NEXT:    kmovd %k0, %eax
14997; VLX-NEXT:    retq
14998;
14999; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15000; NoVLX:       # %bb.0: # %entry
15001; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15002; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15003; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15004; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15005; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15006; NoVLX-NEXT:    kmovw %k0, %eax
15007; NoVLX-NEXT:    vzeroupper
15008; NoVLX-NEXT:    retq
15009entry:
15010  %0 = bitcast <2 x i64> %__a to <8 x i16>
15011  %1 = bitcast <2 x i64> %__b to <8 x i16>
15012  %2 = icmp ult <8 x i16> %0, %1
15013  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15014  %4 = bitcast <32 x i1> %3 to i32
15015  ret i32 %4
15016}
15017
15018define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15019; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15020; VLX:       # %bb.0: # %entry
15021; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
15022; VLX-NEXT:    kmovd %k0, %eax
15023; VLX-NEXT:    retq
15024;
15025; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15026; NoVLX:       # %bb.0: # %entry
15027; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
15028; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15029; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15030; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15031; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15032; NoVLX-NEXT:    kmovw %k0, %eax
15033; NoVLX-NEXT:    vzeroupper
15034; NoVLX-NEXT:    retq
15035entry:
15036  %0 = bitcast <2 x i64> %__a to <8 x i16>
15037  %load = load <2 x i64>, ptr %__b
15038  %1 = bitcast <2 x i64> %load to <8 x i16>
15039  %2 = icmp ult <8 x i16> %0, %1
15040  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15041  %4 = bitcast <32 x i1> %3 to i32
15042  ret i32 %4
15043}
15044
15045define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15046; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15047; VLX:       # %bb.0: # %entry
15048; VLX-NEXT:    kmovd %edi, %k1
15049; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15050; VLX-NEXT:    kmovd %k0, %eax
15051; VLX-NEXT:    retq
15052;
15053; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15054; NoVLX:       # %bb.0: # %entry
15055; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15056; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15057; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15058; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15059; NoVLX-NEXT:    kmovw %edi, %k1
15060; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15061; NoVLX-NEXT:    kmovw %k0, %eax
15062; NoVLX-NEXT:    vzeroupper
15063; NoVLX-NEXT:    retq
15064entry:
15065  %0 = bitcast <2 x i64> %__a to <8 x i16>
15066  %1 = bitcast <2 x i64> %__b to <8 x i16>
15067  %2 = icmp ult <8 x i16> %0, %1
15068  %3 = bitcast i8 %__u to <8 x i1>
15069  %4 = and <8 x i1> %2, %3
15070  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15071  %6 = bitcast <32 x i1> %5 to i32
15072  ret i32 %6
15073}
15074
15075define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15076; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15077; VLX:       # %bb.0: # %entry
15078; VLX-NEXT:    kmovd %edi, %k1
15079; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15080; VLX-NEXT:    kmovd %k0, %eax
15081; VLX-NEXT:    retq
15082;
15083; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15084; NoVLX:       # %bb.0: # %entry
15085; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
15086; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15087; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15088; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15089; NoVLX-NEXT:    kmovw %edi, %k1
15090; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15091; NoVLX-NEXT:    kmovw %k0, %eax
15092; NoVLX-NEXT:    vzeroupper
15093; NoVLX-NEXT:    retq
15094entry:
15095  %0 = bitcast <2 x i64> %__a to <8 x i16>
15096  %load = load <2 x i64>, ptr %__b
15097  %1 = bitcast <2 x i64> %load to <8 x i16>
15098  %2 = icmp ult <8 x i16> %0, %1
15099  %3 = bitcast i8 %__u to <8 x i1>
15100  %4 = and <8 x i1> %2, %3
15101  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15102  %6 = bitcast <32 x i1> %5 to i32
15103  ret i32 %6
15104}
15105
15106
15107define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15108; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15109; VLX:       # %bb.0: # %entry
15110; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
15111; VLX-NEXT:    kmovq %k0, %rax
15112; VLX-NEXT:    retq
15113;
15114; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15115; NoVLX:       # %bb.0: # %entry
15116; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15117; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15118; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15119; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15120; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15121; NoVLX-NEXT:    kmovw %k0, %eax
15122; NoVLX-NEXT:    vzeroupper
15123; NoVLX-NEXT:    retq
15124entry:
15125  %0 = bitcast <2 x i64> %__a to <8 x i16>
15126  %1 = bitcast <2 x i64> %__b to <8 x i16>
15127  %2 = icmp ult <8 x i16> %0, %1
15128  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15129  %4 = bitcast <64 x i1> %3 to i64
15130  ret i64 %4
15131}
15132
15133define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15134; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15135; VLX:       # %bb.0: # %entry
15136; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
15137; VLX-NEXT:    kmovq %k0, %rax
15138; VLX-NEXT:    retq
15139;
15140; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15141; NoVLX:       # %bb.0: # %entry
15142; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
15143; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15144; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15145; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15146; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15147; NoVLX-NEXT:    kmovw %k0, %eax
15148; NoVLX-NEXT:    vzeroupper
15149; NoVLX-NEXT:    retq
15150entry:
15151  %0 = bitcast <2 x i64> %__a to <8 x i16>
15152  %load = load <2 x i64>, ptr %__b
15153  %1 = bitcast <2 x i64> %load to <8 x i16>
15154  %2 = icmp ult <8 x i16> %0, %1
15155  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15156  %4 = bitcast <64 x i1> %3 to i64
15157  ret i64 %4
15158}
15159
15160define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15161; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15162; VLX:       # %bb.0: # %entry
15163; VLX-NEXT:    kmovd %edi, %k1
15164; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15165; VLX-NEXT:    kmovq %k0, %rax
15166; VLX-NEXT:    retq
15167;
15168; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15169; NoVLX:       # %bb.0: # %entry
15170; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15171; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15172; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15173; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15174; NoVLX-NEXT:    kmovw %edi, %k1
15175; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15176; NoVLX-NEXT:    kmovw %k0, %eax
15177; NoVLX-NEXT:    vzeroupper
15178; NoVLX-NEXT:    retq
15179entry:
15180  %0 = bitcast <2 x i64> %__a to <8 x i16>
15181  %1 = bitcast <2 x i64> %__b to <8 x i16>
15182  %2 = icmp ult <8 x i16> %0, %1
15183  %3 = bitcast i8 %__u to <8 x i1>
15184  %4 = and <8 x i1> %2, %3
15185  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15186  %6 = bitcast <64 x i1> %5 to i64
15187  ret i64 %6
15188}
15189
15190define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15191; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15192; VLX:       # %bb.0: # %entry
15193; VLX-NEXT:    kmovd %edi, %k1
15194; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15195; VLX-NEXT:    kmovq %k0, %rax
15196; VLX-NEXT:    retq
15197;
15198; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15199; NoVLX:       # %bb.0: # %entry
15200; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
15201; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15202; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15203; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15204; NoVLX-NEXT:    kmovw %edi, %k1
15205; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15206; NoVLX-NEXT:    kmovw %k0, %eax
15207; NoVLX-NEXT:    vzeroupper
15208; NoVLX-NEXT:    retq
15209entry:
15210  %0 = bitcast <2 x i64> %__a to <8 x i16>
15211  %load = load <2 x i64>, ptr %__b
15212  %1 = bitcast <2 x i64> %load to <8 x i16>
15213  %2 = icmp ult <8 x i16> %0, %1
15214  %3 = bitcast i8 %__u to <8 x i1>
15215  %4 = and <8 x i1> %2, %3
15216  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15217  %6 = bitcast <64 x i1> %5 to i64
15218  ret i64 %6
15219}
15220
15221
15222define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15223; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15224; VLX:       # %bb.0: # %entry
15225; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0
15226; VLX-NEXT:    kmovd %k0, %eax
15227; VLX-NEXT:    vzeroupper
15228; VLX-NEXT:    retq
15229;
15230; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15231; NoVLX:       # %bb.0: # %entry
15232; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15233; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15234; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15235; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15236; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15237; NoVLX-NEXT:    kmovw %k0, %eax
15238; NoVLX-NEXT:    vzeroupper
15239; NoVLX-NEXT:    retq
15240entry:
15241  %0 = bitcast <4 x i64> %__a to <16 x i16>
15242  %1 = bitcast <4 x i64> %__b to <16 x i16>
15243  %2 = icmp ult <16 x i16> %0, %1
15244  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15245  %4 = bitcast <32 x i1> %3 to i32
15246  ret i32 %4
15247}
15248
15249define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
15250; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15251; VLX:       # %bb.0: # %entry
15252; VLX-NEXT:    vpcmpltuw (%rdi), %ymm0, %k0
15253; VLX-NEXT:    kmovd %k0, %eax
15254; VLX-NEXT:    vzeroupper
15255; VLX-NEXT:    retq
15256;
15257; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15258; NoVLX:       # %bb.0: # %entry
15259; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm1
15260; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15261; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15262; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15263; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15264; NoVLX-NEXT:    kmovw %k0, %eax
15265; NoVLX-NEXT:    vzeroupper
15266; NoVLX-NEXT:    retq
15267entry:
15268  %0 = bitcast <4 x i64> %__a to <16 x i16>
15269  %load = load <4 x i64>, ptr %__b
15270  %1 = bitcast <4 x i64> %load to <16 x i16>
15271  %2 = icmp ult <16 x i16> %0, %1
15272  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15273  %4 = bitcast <32 x i1> %3 to i32
15274  ret i32 %4
15275}
15276
15277define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15278; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15279; VLX:       # %bb.0: # %entry
15280; VLX-NEXT:    kmovd %edi, %k1
15281; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15282; VLX-NEXT:    kmovd %k0, %eax
15283; VLX-NEXT:    vzeroupper
15284; VLX-NEXT:    retq
15285;
15286; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15287; NoVLX:       # %bb.0: # %entry
15288; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15289; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15290; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15291; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15292; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15293; NoVLX-NEXT:    kmovw %k0, %eax
15294; NoVLX-NEXT:    andl %edi, %eax
15295; NoVLX-NEXT:    vzeroupper
15296; NoVLX-NEXT:    retq
15297entry:
15298  %0 = bitcast <4 x i64> %__a to <16 x i16>
15299  %1 = bitcast <4 x i64> %__b to <16 x i16>
15300  %2 = icmp ult <16 x i16> %0, %1
15301  %3 = bitcast i16 %__u to <16 x i1>
15302  %4 = and <16 x i1> %2, %3
15303  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15304  %6 = bitcast <32 x i1> %5 to i32
15305  ret i32 %6
15306}
15307
15308define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
15309; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15310; VLX:       # %bb.0: # %entry
15311; VLX-NEXT:    kmovd %edi, %k1
15312; VLX-NEXT:    vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15313; VLX-NEXT:    kmovd %k0, %eax
15314; VLX-NEXT:    vzeroupper
15315; VLX-NEXT:    retq
15316;
15317; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15318; NoVLX:       # %bb.0: # %entry
15319; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15320; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15321; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15322; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15323; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15324; NoVLX-NEXT:    kmovw %k0, %eax
15325; NoVLX-NEXT:    andl %edi, %eax
15326; NoVLX-NEXT:    vzeroupper
15327; NoVLX-NEXT:    retq
15328entry:
15329  %0 = bitcast <4 x i64> %__a to <16 x i16>
15330  %load = load <4 x i64>, ptr %__b
15331  %1 = bitcast <4 x i64> %load to <16 x i16>
15332  %2 = icmp ult <16 x i16> %0, %1
15333  %3 = bitcast i16 %__u to <16 x i1>
15334  %4 = and <16 x i1> %2, %3
15335  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15336  %6 = bitcast <32 x i1> %5 to i32
15337  ret i32 %6
15338}
15339
15340
15341define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15342; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15343; VLX:       # %bb.0: # %entry
15344; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0
15345; VLX-NEXT:    kmovq %k0, %rax
15346; VLX-NEXT:    vzeroupper
15347; VLX-NEXT:    retq
15348;
15349; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15350; NoVLX:       # %bb.0: # %entry
15351; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15352; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15353; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15354; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15355; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15356; NoVLX-NEXT:    kmovw %k0, %eax
15357; NoVLX-NEXT:    vzeroupper
15358; NoVLX-NEXT:    retq
15359entry:
15360  %0 = bitcast <4 x i64> %__a to <16 x i16>
15361  %1 = bitcast <4 x i64> %__b to <16 x i16>
15362  %2 = icmp ult <16 x i16> %0, %1
15363  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15364  %4 = bitcast <64 x i1> %3 to i64
15365  ret i64 %4
15366}
15367
15368define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
15369; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15370; VLX:       # %bb.0: # %entry
15371; VLX-NEXT:    vpcmpltuw (%rdi), %ymm0, %k0
15372; VLX-NEXT:    kmovq %k0, %rax
15373; VLX-NEXT:    vzeroupper
15374; VLX-NEXT:    retq
15375;
15376; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15377; NoVLX:       # %bb.0: # %entry
15378; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm1
15379; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15380; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15381; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15382; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15383; NoVLX-NEXT:    kmovw %k0, %eax
15384; NoVLX-NEXT:    vzeroupper
15385; NoVLX-NEXT:    retq
15386entry:
15387  %0 = bitcast <4 x i64> %__a to <16 x i16>
15388  %load = load <4 x i64>, ptr %__b
15389  %1 = bitcast <4 x i64> %load to <16 x i16>
15390  %2 = icmp ult <16 x i16> %0, %1
15391  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15392  %4 = bitcast <64 x i1> %3 to i64
15393  ret i64 %4
15394}
15395
15396define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15397; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15398; VLX:       # %bb.0: # %entry
15399; VLX-NEXT:    kmovd %edi, %k1
15400; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15401; VLX-NEXT:    kmovq %k0, %rax
15402; VLX-NEXT:    vzeroupper
15403; VLX-NEXT:    retq
15404;
15405; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15406; NoVLX:       # %bb.0: # %entry
15407; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15408; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15409; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15410; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15411; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15412; NoVLX-NEXT:    kmovw %k0, %eax
15413; NoVLX-NEXT:    andl %edi, %eax
15414; NoVLX-NEXT:    vzeroupper
15415; NoVLX-NEXT:    retq
15416entry:
15417  %0 = bitcast <4 x i64> %__a to <16 x i16>
15418  %1 = bitcast <4 x i64> %__b to <16 x i16>
15419  %2 = icmp ult <16 x i16> %0, %1
15420  %3 = bitcast i16 %__u to <16 x i1>
15421  %4 = and <16 x i1> %2, %3
15422  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15423  %6 = bitcast <64 x i1> %5 to i64
15424  ret i64 %6
15425}
15426
15427define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
15428; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15429; VLX:       # %bb.0: # %entry
15430; VLX-NEXT:    kmovd %edi, %k1
15431; VLX-NEXT:    vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15432; VLX-NEXT:    kmovq %k0, %rax
15433; VLX-NEXT:    vzeroupper
15434; VLX-NEXT:    retq
15435;
15436; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15437; NoVLX:       # %bb.0: # %entry
15438; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15439; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15440; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15441; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15442; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15443; NoVLX-NEXT:    kmovw %k0, %eax
15444; NoVLX-NEXT:    andl %edi, %eax
15445; NoVLX-NEXT:    vzeroupper
15446; NoVLX-NEXT:    retq
15447entry:
15448  %0 = bitcast <4 x i64> %__a to <16 x i16>
15449  %load = load <4 x i64>, ptr %__b
15450  %1 = bitcast <4 x i64> %load to <16 x i16>
15451  %2 = icmp ult <16 x i16> %0, %1
15452  %3 = bitcast i16 %__u to <16 x i1>
15453  %4 = and <16 x i1> %2, %3
15454  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15455  %6 = bitcast <64 x i1> %5 to i64
15456  ret i64 %6
15457}
15458
15459
15460define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15461; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15462; VLX:       # %bb.0: # %entry
15463; VLX-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
15464; VLX-NEXT:    kmovq %k0, %rax
15465; VLX-NEXT:    vzeroupper
15466; VLX-NEXT:    retq
15467;
15468; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15469; NoVLX:       # %bb.0: # %entry
15470; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2
15471; NoVLX-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
15472; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
15473; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
15474; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
15475; NoVLX-NEXT:    kmovw %k0, %ecx
15476; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
15477; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15478; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15479; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15480; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15481; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15482; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15483; NoVLX-NEXT:    kmovw %k0, %eax
15484; NoVLX-NEXT:    shll $16, %eax
15485; NoVLX-NEXT:    orl %ecx, %eax
15486; NoVLX-NEXT:    vzeroupper
15487; NoVLX-NEXT:    retq
15488entry:
15489  %0 = bitcast <8 x i64> %__a to <32 x i16>
15490  %1 = bitcast <8 x i64> %__b to <32 x i16>
15491  %2 = icmp ult <32 x i16> %0, %1
15492  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15493  %4 = bitcast <64 x i1> %3 to i64
15494  ret i64 %4
15495}
15496
15497define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
15498; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15499; VLX:       # %bb.0: # %entry
15500; VLX-NEXT:    vpcmpltuw (%rdi), %zmm0, %k0
15501; VLX-NEXT:    kmovq %k0, %rax
15502; VLX-NEXT:    vzeroupper
15503; VLX-NEXT:    retq
15504;
15505; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15506; NoVLX:       # %bb.0: # %entry
15507; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm1
15508; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm1
15509; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
15510; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
15511; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15512; NoVLX-NEXT:    kmovw %k0, %ecx
15513; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15514; NoVLX-NEXT:    vpmaxuw 32(%rdi), %ymm0, %ymm1
15515; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15516; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15517; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15518; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15519; NoVLX-NEXT:    kmovw %k0, %eax
15520; NoVLX-NEXT:    shll $16, %eax
15521; NoVLX-NEXT:    orl %ecx, %eax
15522; NoVLX-NEXT:    vzeroupper
15523; NoVLX-NEXT:    retq
15524entry:
15525  %0 = bitcast <8 x i64> %__a to <32 x i16>
15526  %load = load <8 x i64>, ptr %__b
15527  %1 = bitcast <8 x i64> %load to <32 x i16>
15528  %2 = icmp ult <32 x i16> %0, %1
15529  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15530  %4 = bitcast <64 x i1> %3 to i64
15531  ret i64 %4
15532}
15533
15534define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15535; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15536; VLX:       # %bb.0: # %entry
15537; VLX-NEXT:    kmovd %edi, %k1
15538; VLX-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15539; VLX-NEXT:    kmovq %k0, %rax
15540; VLX-NEXT:    vzeroupper
15541; VLX-NEXT:    retq
15542;
15543; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15544; NoVLX:       # %bb.0: # %entry
15545; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2
15546; NoVLX-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
15547; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
15548; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
15549; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
15550; NoVLX-NEXT:    kmovw %k0, %eax
15551; NoVLX-NEXT:    andl %edi, %eax
15552; NoVLX-NEXT:    shrl $16, %edi
15553; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
15554; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15555; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15556; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15557; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15558; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15559; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15560; NoVLX-NEXT:    kmovw %k0, %ecx
15561; NoVLX-NEXT:    andl %edi, %ecx
15562; NoVLX-NEXT:    shll $16, %ecx
15563; NoVLX-NEXT:    movzwl %ax, %eax
15564; NoVLX-NEXT:    orl %ecx, %eax
15565; NoVLX-NEXT:    vzeroupper
15566; NoVLX-NEXT:    retq
15567entry:
15568  %0 = bitcast <8 x i64> %__a to <32 x i16>
15569  %1 = bitcast <8 x i64> %__b to <32 x i16>
15570  %2 = icmp ult <32 x i16> %0, %1
15571  %3 = bitcast i32 %__u to <32 x i1>
15572  %4 = and <32 x i1> %2, %3
15573  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15574  %6 = bitcast <64 x i1> %5 to i64
15575  ret i64 %6
15576}
15577
15578define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
15579; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15580; VLX:       # %bb.0: # %entry
15581; VLX-NEXT:    kmovd %edi, %k1
15582; VLX-NEXT:    vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15583; VLX-NEXT:    kmovq %k0, %rax
15584; VLX-NEXT:    vzeroupper
15585; VLX-NEXT:    retq
15586;
15587; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15588; NoVLX:       # %bb.0: # %entry
15589; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15590; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm1
15591; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
15592; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
15593; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15594; NoVLX-NEXT:    kmovw %k0, %eax
15595; NoVLX-NEXT:    andl %edi, %eax
15596; NoVLX-NEXT:    shrl $16, %edi
15597; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15598; NoVLX-NEXT:    vpmaxuw 32(%rsi), %ymm0, %ymm1
15599; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15600; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15601; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15602; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15603; NoVLX-NEXT:    kmovw %k0, %ecx
15604; NoVLX-NEXT:    andl %edi, %ecx
15605; NoVLX-NEXT:    shll $16, %ecx
15606; NoVLX-NEXT:    movzwl %ax, %eax
15607; NoVLX-NEXT:    orl %ecx, %eax
15608; NoVLX-NEXT:    vzeroupper
15609; NoVLX-NEXT:    retq
15610entry:
15611  %0 = bitcast <8 x i64> %__a to <32 x i16>
15612  %load = load <8 x i64>, ptr %__b
15613  %1 = bitcast <8 x i64> %load to <32 x i16>
15614  %2 = icmp ult <32 x i16> %0, %1
15615  %3 = bitcast i32 %__u to <32 x i1>
15616  %4 = and <32 x i1> %2, %3
15617  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15618  %6 = bitcast <64 x i1> %5 to i64
15619  ret i64 %6
15620}
15621
15622
15623define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15624; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15625; VLX:       # %bb.0: # %entry
15626; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
15627; VLX-NEXT:    kmovd %k0, %eax
15628; VLX-NEXT:    # kill: def $al killed $al killed $eax
15629; VLX-NEXT:    retq
15630;
15631; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15632; NoVLX:       # %bb.0: # %entry
15633; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
15634; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15635; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15636; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15637; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15638; NoVLX-NEXT:    kmovw %k0, %eax
15639; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15640; NoVLX-NEXT:    vzeroupper
15641; NoVLX-NEXT:    retq
15642entry:
15643  %0 = bitcast <2 x i64> %__a to <4 x i32>
15644  %1 = bitcast <2 x i64> %__b to <4 x i32>
15645  %2 = icmp ult <4 x i32> %0, %1
15646  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15647  %4 = bitcast <8 x i1> %3 to i8
15648  ret i8 %4
15649}
15650
15651define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15652; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15653; VLX:       # %bb.0: # %entry
15654; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
15655; VLX-NEXT:    kmovd %k0, %eax
15656; VLX-NEXT:    # kill: def $al killed $al killed $eax
15657; VLX-NEXT:    retq
15658;
15659; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15660; NoVLX:       # %bb.0: # %entry
15661; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15662; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
15663; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15664; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15665; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15666; NoVLX-NEXT:    kmovw %k0, %eax
15667; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15668; NoVLX-NEXT:    vzeroupper
15669; NoVLX-NEXT:    retq
15670entry:
15671  %0 = bitcast <2 x i64> %__a to <4 x i32>
15672  %load = load <2 x i64>, ptr %__b
15673  %1 = bitcast <2 x i64> %load to <4 x i32>
15674  %2 = icmp ult <4 x i32> %0, %1
15675  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15676  %4 = bitcast <8 x i1> %3 to i8
15677  ret i8 %4
15678}
15679
15680define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15681; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15682; VLX:       # %bb.0: # %entry
15683; VLX-NEXT:    kmovd %edi, %k1
15684; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15685; VLX-NEXT:    kmovd %k0, %eax
15686; VLX-NEXT:    # kill: def $al killed $al killed $eax
15687; VLX-NEXT:    retq
15688;
15689; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15690; NoVLX:       # %bb.0: # %entry
15691; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
15692; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15693; NoVLX-NEXT:    kmovw %edi, %k1
15694; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15695; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15696; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15697; NoVLX-NEXT:    kmovw %k0, %eax
15698; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15699; NoVLX-NEXT:    vzeroupper
15700; NoVLX-NEXT:    retq
15701entry:
15702  %0 = bitcast <2 x i64> %__a to <4 x i32>
15703  %1 = bitcast <2 x i64> %__b to <4 x i32>
15704  %2 = icmp ult <4 x i32> %0, %1
15705  %3 = bitcast i8 %__u to <8 x i1>
15706  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15707  %4 = and <4 x i1> %2, %extract.i
15708  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15709  %6 = bitcast <8 x i1> %5 to i8
15710  ret i8 %6
15711}
15712
15713define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15714; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15715; VLX:       # %bb.0: # %entry
15716; VLX-NEXT:    kmovd %edi, %k1
15717; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15718; VLX-NEXT:    kmovd %k0, %eax
15719; VLX-NEXT:    # kill: def $al killed $al killed $eax
15720; VLX-NEXT:    retq
15721;
15722; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15723; NoVLX:       # %bb.0: # %entry
15724; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15725; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
15726; NoVLX-NEXT:    kmovw %edi, %k1
15727; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15728; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15729; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15730; NoVLX-NEXT:    kmovw %k0, %eax
15731; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15732; NoVLX-NEXT:    vzeroupper
15733; NoVLX-NEXT:    retq
15734entry:
15735  %0 = bitcast <2 x i64> %__a to <4 x i32>
15736  %load = load <2 x i64>, ptr %__b
15737  %1 = bitcast <2 x i64> %load to <4 x i32>
15738  %2 = icmp ult <4 x i32> %0, %1
15739  %3 = bitcast i8 %__u to <8 x i1>
15740  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15741  %4 = and <4 x i1> %2, %extract.i
15742  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15743  %6 = bitcast <8 x i1> %5 to i8
15744  ret i8 %6
15745}
15746
15747
15748define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15749; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15750; VLX:       # %bb.0: # %entry
15751; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
15752; VLX-NEXT:    kmovd %k0, %eax
15753; VLX-NEXT:    # kill: def $al killed $al killed $eax
15754; VLX-NEXT:    retq
15755;
15756; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15757; NoVLX:       # %bb.0: # %entry
15758; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15759; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
15760; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15761; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15762; NoVLX-NEXT:    kmovw %k0, %eax
15763; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15764; NoVLX-NEXT:    vzeroupper
15765; NoVLX-NEXT:    retq
15766entry:
15767  %0 = bitcast <2 x i64> %__a to <4 x i32>
15768  %load = load i32, ptr %__b
15769  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15770  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15771  %2 = icmp ult <4 x i32> %0, %1
15772  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15773  %4 = bitcast <8 x i1> %3 to i8
15774  ret i8 %4
15775}
15776
15777define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15778; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15779; VLX:       # %bb.0: # %entry
15780; VLX-NEXT:    kmovd %edi, %k1
15781; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15782; VLX-NEXT:    kmovd %k0, %eax
15783; VLX-NEXT:    # kill: def $al killed $al killed $eax
15784; VLX-NEXT:    retq
15785;
15786; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15787; NoVLX:       # %bb.0: # %entry
15788; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15789; NoVLX-NEXT:    kmovw %edi, %k1
15790; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15791; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15792; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15793; NoVLX-NEXT:    kmovw %k0, %eax
15794; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15795; NoVLX-NEXT:    vzeroupper
15796; NoVLX-NEXT:    retq
15797entry:
15798  %0 = bitcast <2 x i64> %__a to <4 x i32>
15799  %load = load i32, ptr %__b
15800  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15801  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15802  %2 = icmp ult <4 x i32> %0, %1
15803  %3 = bitcast i8 %__u to <8 x i1>
15804  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15805  %4 = and <4 x i1> %extract.i, %2
15806  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15807  %6 = bitcast <8 x i1> %5 to i8
15808  ret i8 %6
15809}
15810
15811
15812define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15813; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15814; VLX:       # %bb.0: # %entry
15815; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
15816; VLX-NEXT:    kmovd %k0, %eax
15817; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15818; VLX-NEXT:    retq
15819;
15820; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15821; NoVLX:       # %bb.0: # %entry
15822; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
15823; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15824; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15825; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15826; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15827; NoVLX-NEXT:    kmovw %k0, %eax
15828; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15829; NoVLX-NEXT:    vzeroupper
15830; NoVLX-NEXT:    retq
15831entry:
15832  %0 = bitcast <2 x i64> %__a to <4 x i32>
15833  %1 = bitcast <2 x i64> %__b to <4 x i32>
15834  %2 = icmp ult <4 x i32> %0, %1
15835  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15836  %4 = bitcast <16 x i1> %3 to i16
15837  ret i16 %4
15838}
15839
15840define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15841; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15842; VLX:       # %bb.0: # %entry
15843; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
15844; VLX-NEXT:    kmovd %k0, %eax
15845; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15846; VLX-NEXT:    retq
15847;
15848; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15849; NoVLX:       # %bb.0: # %entry
15850; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15851; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
15852; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15853; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15854; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15855; NoVLX-NEXT:    kmovw %k0, %eax
15856; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15857; NoVLX-NEXT:    vzeroupper
15858; NoVLX-NEXT:    retq
15859entry:
15860  %0 = bitcast <2 x i64> %__a to <4 x i32>
15861  %load = load <2 x i64>, ptr %__b
15862  %1 = bitcast <2 x i64> %load to <4 x i32>
15863  %2 = icmp ult <4 x i32> %0, %1
15864  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15865  %4 = bitcast <16 x i1> %3 to i16
15866  ret i16 %4
15867}
15868
15869define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15870; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15871; VLX:       # %bb.0: # %entry
15872; VLX-NEXT:    kmovd %edi, %k1
15873; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15874; VLX-NEXT:    kmovd %k0, %eax
15875; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15876; VLX-NEXT:    retq
15877;
15878; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15879; NoVLX:       # %bb.0: # %entry
15880; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
15881; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15882; NoVLX-NEXT:    kmovw %edi, %k1
15883; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15884; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15885; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15886; NoVLX-NEXT:    kmovw %k0, %eax
15887; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15888; NoVLX-NEXT:    vzeroupper
15889; NoVLX-NEXT:    retq
15890entry:
15891  %0 = bitcast <2 x i64> %__a to <4 x i32>
15892  %1 = bitcast <2 x i64> %__b to <4 x i32>
15893  %2 = icmp ult <4 x i32> %0, %1
15894  %3 = bitcast i8 %__u to <8 x i1>
15895  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15896  %4 = and <4 x i1> %2, %extract.i
15897  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15898  %6 = bitcast <16 x i1> %5 to i16
15899  ret i16 %6
15900}
15901
15902define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15903; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15904; VLX:       # %bb.0: # %entry
15905; VLX-NEXT:    kmovd %edi, %k1
15906; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15907; VLX-NEXT:    kmovd %k0, %eax
15908; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15909; VLX-NEXT:    retq
15910;
15911; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15912; NoVLX:       # %bb.0: # %entry
15913; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15914; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
15915; NoVLX-NEXT:    kmovw %edi, %k1
15916; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15917; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15918; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15919; NoVLX-NEXT:    kmovw %k0, %eax
15920; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15921; NoVLX-NEXT:    vzeroupper
15922; NoVLX-NEXT:    retq
15923entry:
15924  %0 = bitcast <2 x i64> %__a to <4 x i32>
15925  %load = load <2 x i64>, ptr %__b
15926  %1 = bitcast <2 x i64> %load to <4 x i32>
15927  %2 = icmp ult <4 x i32> %0, %1
15928  %3 = bitcast i8 %__u to <8 x i1>
15929  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15930  %4 = and <4 x i1> %2, %extract.i
15931  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15932  %6 = bitcast <16 x i1> %5 to i16
15933  ret i16 %6
15934}
15935
15936
15937define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15938; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15939; VLX:       # %bb.0: # %entry
15940; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
15941; VLX-NEXT:    kmovd %k0, %eax
15942; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15943; VLX-NEXT:    retq
15944;
15945; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15946; NoVLX:       # %bb.0: # %entry
15947; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15948; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
15949; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15950; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15951; NoVLX-NEXT:    kmovw %k0, %eax
15952; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15953; NoVLX-NEXT:    vzeroupper
15954; NoVLX-NEXT:    retq
15955entry:
15956  %0 = bitcast <2 x i64> %__a to <4 x i32>
15957  %load = load i32, ptr %__b
15958  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15959  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15960  %2 = icmp ult <4 x i32> %0, %1
15961  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15962  %4 = bitcast <16 x i1> %3 to i16
15963  ret i16 %4
15964}
15965
15966define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15967; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15968; VLX:       # %bb.0: # %entry
15969; VLX-NEXT:    kmovd %edi, %k1
15970; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15971; VLX-NEXT:    kmovd %k0, %eax
15972; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15973; VLX-NEXT:    retq
15974;
15975; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15976; NoVLX:       # %bb.0: # %entry
15977; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15978; NoVLX-NEXT:    kmovw %edi, %k1
15979; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15980; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15981; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15982; NoVLX-NEXT:    kmovw %k0, %eax
15983; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15984; NoVLX-NEXT:    vzeroupper
15985; NoVLX-NEXT:    retq
15986entry:
15987  %0 = bitcast <2 x i64> %__a to <4 x i32>
15988  %load = load i32, ptr %__b
15989  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15990  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15991  %2 = icmp ult <4 x i32> %0, %1
15992  %3 = bitcast i8 %__u to <8 x i1>
15993  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15994  %4 = and <4 x i1> %extract.i, %2
15995  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15996  %6 = bitcast <16 x i1> %5 to i16
15997  ret i16 %6
15998}
15999
16000
16001define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16002; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16003; VLX:       # %bb.0: # %entry
16004; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
16005; VLX-NEXT:    kmovd %k0, %eax
16006; VLX-NEXT:    retq
16007;
16008; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16009; NoVLX:       # %bb.0: # %entry
16010; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16011; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16012; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16013; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16014; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16015; NoVLX-NEXT:    kmovw %k0, %eax
16016; NoVLX-NEXT:    vzeroupper
16017; NoVLX-NEXT:    retq
16018entry:
16019  %0 = bitcast <2 x i64> %__a to <4 x i32>
16020  %1 = bitcast <2 x i64> %__b to <4 x i32>
16021  %2 = icmp ult <4 x i32> %0, %1
16022  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16023  %4 = bitcast <32 x i1> %3 to i32
16024  ret i32 %4
16025}
16026
16027define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16028; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16029; VLX:       # %bb.0: # %entry
16030; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
16031; VLX-NEXT:    kmovd %k0, %eax
16032; VLX-NEXT:    retq
16033;
16034; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16035; NoVLX:       # %bb.0: # %entry
16036; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16037; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
16038; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16039; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16040; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16041; NoVLX-NEXT:    kmovw %k0, %eax
16042; NoVLX-NEXT:    vzeroupper
16043; NoVLX-NEXT:    retq
16044entry:
16045  %0 = bitcast <2 x i64> %__a to <4 x i32>
16046  %load = load <2 x i64>, ptr %__b
16047  %1 = bitcast <2 x i64> %load to <4 x i32>
16048  %2 = icmp ult <4 x i32> %0, %1
16049  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16050  %4 = bitcast <32 x i1> %3 to i32
16051  ret i32 %4
16052}
16053
16054define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16055; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16056; VLX:       # %bb.0: # %entry
16057; VLX-NEXT:    kmovd %edi, %k1
16058; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16059; VLX-NEXT:    kmovd %k0, %eax
16060; VLX-NEXT:    retq
16061;
16062; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16063; NoVLX:       # %bb.0: # %entry
16064; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16065; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16066; NoVLX-NEXT:    kmovw %edi, %k1
16067; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16068; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16069; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16070; NoVLX-NEXT:    kmovw %k0, %eax
16071; NoVLX-NEXT:    vzeroupper
16072; NoVLX-NEXT:    retq
16073entry:
16074  %0 = bitcast <2 x i64> %__a to <4 x i32>
16075  %1 = bitcast <2 x i64> %__b to <4 x i32>
16076  %2 = icmp ult <4 x i32> %0, %1
16077  %3 = bitcast i8 %__u to <8 x i1>
16078  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16079  %4 = and <4 x i1> %2, %extract.i
16080  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16081  %6 = bitcast <32 x i1> %5 to i32
16082  ret i32 %6
16083}
16084
16085define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16086; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16087; VLX:       # %bb.0: # %entry
16088; VLX-NEXT:    kmovd %edi, %k1
16089; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16090; VLX-NEXT:    kmovd %k0, %eax
16091; VLX-NEXT:    retq
16092;
16093; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16094; NoVLX:       # %bb.0: # %entry
16095; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16096; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16097; NoVLX-NEXT:    kmovw %edi, %k1
16098; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16099; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16100; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16101; NoVLX-NEXT:    kmovw %k0, %eax
16102; NoVLX-NEXT:    vzeroupper
16103; NoVLX-NEXT:    retq
16104entry:
16105  %0 = bitcast <2 x i64> %__a to <4 x i32>
16106  %load = load <2 x i64>, ptr %__b
16107  %1 = bitcast <2 x i64> %load to <4 x i32>
16108  %2 = icmp ult <4 x i32> %0, %1
16109  %3 = bitcast i8 %__u to <8 x i1>
16110  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16111  %4 = and <4 x i1> %2, %extract.i
16112  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16113  %6 = bitcast <32 x i1> %5 to i32
16114  ret i32 %6
16115}
16116
16117
16118define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16119; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16120; VLX:       # %bb.0: # %entry
16121; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16122; VLX-NEXT:    kmovd %k0, %eax
16123; VLX-NEXT:    retq
16124;
16125; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16126; NoVLX:       # %bb.0: # %entry
16127; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16128; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
16129; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16130; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16131; NoVLX-NEXT:    kmovw %k0, %eax
16132; NoVLX-NEXT:    vzeroupper
16133; NoVLX-NEXT:    retq
16134entry:
16135  %0 = bitcast <2 x i64> %__a to <4 x i32>
16136  %load = load i32, ptr %__b
16137  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16138  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16139  %2 = icmp ult <4 x i32> %0, %1
16140  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16141  %4 = bitcast <32 x i1> %3 to i32
16142  ret i32 %4
16143}
16144
16145define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16146; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16147; VLX:       # %bb.0: # %entry
16148; VLX-NEXT:    kmovd %edi, %k1
16149; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16150; VLX-NEXT:    kmovd %k0, %eax
16151; VLX-NEXT:    retq
16152;
16153; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16154; NoVLX:       # %bb.0: # %entry
16155; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16156; NoVLX-NEXT:    kmovw %edi, %k1
16157; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16158; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16159; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16160; NoVLX-NEXT:    kmovw %k0, %eax
16161; NoVLX-NEXT:    vzeroupper
16162; NoVLX-NEXT:    retq
16163entry:
16164  %0 = bitcast <2 x i64> %__a to <4 x i32>
16165  %load = load i32, ptr %__b
16166  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16167  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16168  %2 = icmp ult <4 x i32> %0, %1
16169  %3 = bitcast i8 %__u to <8 x i1>
16170  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16171  %4 = and <4 x i1> %extract.i, %2
16172  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16173  %6 = bitcast <32 x i1> %5 to i32
16174  ret i32 %6
16175}
16176
16177
16178define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16179; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16180; VLX:       # %bb.0: # %entry
16181; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
16182; VLX-NEXT:    kmovq %k0, %rax
16183; VLX-NEXT:    retq
16184;
16185; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16186; NoVLX:       # %bb.0: # %entry
16187; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16188; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16189; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16190; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16191; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16192; NoVLX-NEXT:    kmovw %k0, %eax
16193; NoVLX-NEXT:    vzeroupper
16194; NoVLX-NEXT:    retq
16195entry:
16196  %0 = bitcast <2 x i64> %__a to <4 x i32>
16197  %1 = bitcast <2 x i64> %__b to <4 x i32>
16198  %2 = icmp ult <4 x i32> %0, %1
16199  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16200  %4 = bitcast <64 x i1> %3 to i64
16201  ret i64 %4
16202}
16203
16204define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16205; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16206; VLX:       # %bb.0: # %entry
16207; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
16208; VLX-NEXT:    kmovq %k0, %rax
16209; VLX-NEXT:    retq
16210;
16211; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16212; NoVLX:       # %bb.0: # %entry
16213; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16214; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
16215; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16216; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16217; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16218; NoVLX-NEXT:    kmovw %k0, %eax
16219; NoVLX-NEXT:    vzeroupper
16220; NoVLX-NEXT:    retq
16221entry:
16222  %0 = bitcast <2 x i64> %__a to <4 x i32>
16223  %load = load <2 x i64>, ptr %__b
16224  %1 = bitcast <2 x i64> %load to <4 x i32>
16225  %2 = icmp ult <4 x i32> %0, %1
16226  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16227  %4 = bitcast <64 x i1> %3 to i64
16228  ret i64 %4
16229}
16230
16231define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16232; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16233; VLX:       # %bb.0: # %entry
16234; VLX-NEXT:    kmovd %edi, %k1
16235; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16236; VLX-NEXT:    kmovq %k0, %rax
16237; VLX-NEXT:    retq
16238;
16239; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16240; NoVLX:       # %bb.0: # %entry
16241; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16242; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16243; NoVLX-NEXT:    kmovw %edi, %k1
16244; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16245; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16246; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16247; NoVLX-NEXT:    kmovw %k0, %eax
16248; NoVLX-NEXT:    vzeroupper
16249; NoVLX-NEXT:    retq
16250entry:
16251  %0 = bitcast <2 x i64> %__a to <4 x i32>
16252  %1 = bitcast <2 x i64> %__b to <4 x i32>
16253  %2 = icmp ult <4 x i32> %0, %1
16254  %3 = bitcast i8 %__u to <8 x i1>
16255  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16256  %4 = and <4 x i1> %2, %extract.i
16257  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16258  %6 = bitcast <64 x i1> %5 to i64
16259  ret i64 %6
16260}
16261
16262define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16263; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16264; VLX:       # %bb.0: # %entry
16265; VLX-NEXT:    kmovd %edi, %k1
16266; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16267; VLX-NEXT:    kmovq %k0, %rax
16268; VLX-NEXT:    retq
16269;
16270; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16271; NoVLX:       # %bb.0: # %entry
16272; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16273; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16274; NoVLX-NEXT:    kmovw %edi, %k1
16275; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16276; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16277; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16278; NoVLX-NEXT:    kmovw %k0, %eax
16279; NoVLX-NEXT:    vzeroupper
16280; NoVLX-NEXT:    retq
16281entry:
16282  %0 = bitcast <2 x i64> %__a to <4 x i32>
16283  %load = load <2 x i64>, ptr %__b
16284  %1 = bitcast <2 x i64> %load to <4 x i32>
16285  %2 = icmp ult <4 x i32> %0, %1
16286  %3 = bitcast i8 %__u to <8 x i1>
16287  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16288  %4 = and <4 x i1> %2, %extract.i
16289  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16290  %6 = bitcast <64 x i1> %5 to i64
16291  ret i64 %6
16292}
16293
16294
16295define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16296; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16297; VLX:       # %bb.0: # %entry
16298; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16299; VLX-NEXT:    kmovq %k0, %rax
16300; VLX-NEXT:    retq
16301;
16302; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16303; NoVLX:       # %bb.0: # %entry
16304; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16305; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
16306; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16307; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16308; NoVLX-NEXT:    kmovw %k0, %eax
16309; NoVLX-NEXT:    vzeroupper
16310; NoVLX-NEXT:    retq
16311entry:
16312  %0 = bitcast <2 x i64> %__a to <4 x i32>
16313  %load = load i32, ptr %__b
16314  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16315  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16316  %2 = icmp ult <4 x i32> %0, %1
16317  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16318  %4 = bitcast <64 x i1> %3 to i64
16319  ret i64 %4
16320}
16321
16322define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16323; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16324; VLX:       # %bb.0: # %entry
16325; VLX-NEXT:    kmovd %edi, %k1
16326; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16327; VLX-NEXT:    kmovq %k0, %rax
16328; VLX-NEXT:    retq
16329;
16330; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16331; NoVLX:       # %bb.0: # %entry
16332; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16333; NoVLX-NEXT:    kmovw %edi, %k1
16334; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16335; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16336; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16337; NoVLX-NEXT:    kmovw %k0, %eax
16338; NoVLX-NEXT:    vzeroupper
16339; NoVLX-NEXT:    retq
16340entry:
16341  %0 = bitcast <2 x i64> %__a to <4 x i32>
16342  %load = load i32, ptr %__b
16343  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16344  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16345  %2 = icmp ult <4 x i32> %0, %1
16346  %3 = bitcast i8 %__u to <8 x i1>
16347  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16348  %4 = and <4 x i1> %extract.i, %2
16349  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16350  %6 = bitcast <64 x i1> %5 to i64
16351  ret i64 %6
16352}
16353
16354
16355define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16356; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16357; VLX:       # %bb.0: # %entry
16358; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
16359; VLX-NEXT:    kmovd %k0, %eax
16360; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16361; VLX-NEXT:    vzeroupper
16362; VLX-NEXT:    retq
16363;
16364; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16365; NoVLX:       # %bb.0: # %entry
16366; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16367; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16368; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16369; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16370; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16371; NoVLX-NEXT:    kmovw %k0, %eax
16372; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16373; NoVLX-NEXT:    vzeroupper
16374; NoVLX-NEXT:    retq
16375entry:
16376  %0 = bitcast <4 x i64> %__a to <8 x i32>
16377  %1 = bitcast <4 x i64> %__b to <8 x i32>
16378  %2 = icmp ult <8 x i32> %0, %1
16379  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16380  %4 = bitcast <16 x i1> %3 to i16
16381  ret i16 %4
16382}
16383
16384define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16385; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16386; VLX:       # %bb.0: # %entry
16387; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
16388; VLX-NEXT:    kmovd %k0, %eax
16389; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16390; VLX-NEXT:    vzeroupper
16391; VLX-NEXT:    retq
16392;
16393; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16394; NoVLX:       # %bb.0: # %entry
16395; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16396; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
16397; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16398; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16399; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16400; NoVLX-NEXT:    kmovw %k0, %eax
16401; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16402; NoVLX-NEXT:    vzeroupper
16403; NoVLX-NEXT:    retq
16404entry:
16405  %0 = bitcast <4 x i64> %__a to <8 x i32>
16406  %load = load <4 x i64>, ptr %__b
16407  %1 = bitcast <4 x i64> %load to <8 x i32>
16408  %2 = icmp ult <8 x i32> %0, %1
16409  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16410  %4 = bitcast <16 x i1> %3 to i16
16411  ret i16 %4
16412}
16413
16414define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16415; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16416; VLX:       # %bb.0: # %entry
16417; VLX-NEXT:    kmovd %edi, %k1
16418; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16419; VLX-NEXT:    kmovd %k0, %eax
16420; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16421; VLX-NEXT:    vzeroupper
16422; VLX-NEXT:    retq
16423;
16424; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16425; NoVLX:       # %bb.0: # %entry
16426; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16427; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16428; NoVLX-NEXT:    kmovw %edi, %k1
16429; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16430; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16431; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16432; NoVLX-NEXT:    kmovw %k0, %eax
16433; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16434; NoVLX-NEXT:    vzeroupper
16435; NoVLX-NEXT:    retq
16436entry:
16437  %0 = bitcast <4 x i64> %__a to <8 x i32>
16438  %1 = bitcast <4 x i64> %__b to <8 x i32>
16439  %2 = icmp ult <8 x i32> %0, %1
16440  %3 = bitcast i8 %__u to <8 x i1>
16441  %4 = and <8 x i1> %2, %3
16442  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16443  %6 = bitcast <16 x i1> %5 to i16
16444  ret i16 %6
16445}
16446
16447define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16448; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16449; VLX:       # %bb.0: # %entry
16450; VLX-NEXT:    kmovd %edi, %k1
16451; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16452; VLX-NEXT:    kmovd %k0, %eax
16453; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16454; VLX-NEXT:    vzeroupper
16455; VLX-NEXT:    retq
16456;
16457; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16458; NoVLX:       # %bb.0: # %entry
16459; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16460; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
16461; NoVLX-NEXT:    kmovw %edi, %k1
16462; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16463; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16464; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16465; NoVLX-NEXT:    kmovw %k0, %eax
16466; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16467; NoVLX-NEXT:    vzeroupper
16468; NoVLX-NEXT:    retq
16469entry:
16470  %0 = bitcast <4 x i64> %__a to <8 x i32>
16471  %load = load <4 x i64>, ptr %__b
16472  %1 = bitcast <4 x i64> %load to <8 x i32>
16473  %2 = icmp ult <8 x i32> %0, %1
16474  %3 = bitcast i8 %__u to <8 x i1>
16475  %4 = and <8 x i1> %2, %3
16476  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16477  %6 = bitcast <16 x i1> %5 to i16
16478  ret i16 %6
16479}
16480
16481
16482define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16483; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16484; VLX:       # %bb.0: # %entry
16485; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
16486; VLX-NEXT:    kmovd %k0, %eax
16487; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16488; VLX-NEXT:    vzeroupper
16489; VLX-NEXT:    retq
16490;
16491; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16492; NoVLX:       # %bb.0: # %entry
16493; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16494; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
16495; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16496; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16497; NoVLX-NEXT:    kmovw %k0, %eax
16498; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16499; NoVLX-NEXT:    vzeroupper
16500; NoVLX-NEXT:    retq
16501entry:
16502  %0 = bitcast <4 x i64> %__a to <8 x i32>
16503  %load = load i32, ptr %__b
16504  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16505  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16506  %2 = icmp ult <8 x i32> %0, %1
16507  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16508  %4 = bitcast <16 x i1> %3 to i16
16509  ret i16 %4
16510}
16511
16512define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16513; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16514; VLX:       # %bb.0: # %entry
16515; VLX-NEXT:    kmovd %edi, %k1
16516; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16517; VLX-NEXT:    kmovd %k0, %eax
16518; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16519; VLX-NEXT:    vzeroupper
16520; VLX-NEXT:    retq
16521;
16522; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16523; NoVLX:       # %bb.0: # %entry
16524; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16525; NoVLX-NEXT:    kmovw %edi, %k1
16526; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16527; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16528; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16529; NoVLX-NEXT:    kmovw %k0, %eax
16530; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16531; NoVLX-NEXT:    vzeroupper
16532; NoVLX-NEXT:    retq
16533entry:
16534  %0 = bitcast <4 x i64> %__a to <8 x i32>
16535  %load = load i32, ptr %__b
16536  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16537  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16538  %2 = icmp ult <8 x i32> %0, %1
16539  %3 = bitcast i8 %__u to <8 x i1>
16540  %4 = and <8 x i1> %3, %2
16541  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16542  %6 = bitcast <16 x i1> %5 to i16
16543  ret i16 %6
16544}
16545
16546
16547define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16548; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16549; VLX:       # %bb.0: # %entry
16550; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
16551; VLX-NEXT:    kmovd %k0, %eax
16552; VLX-NEXT:    vzeroupper
16553; VLX-NEXT:    retq
16554;
16555; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16556; NoVLX:       # %bb.0: # %entry
16557; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16558; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16559; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16560; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16561; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16562; NoVLX-NEXT:    kmovw %k0, %eax
16563; NoVLX-NEXT:    vzeroupper
16564; NoVLX-NEXT:    retq
16565entry:
16566  %0 = bitcast <4 x i64> %__a to <8 x i32>
16567  %1 = bitcast <4 x i64> %__b to <8 x i32>
16568  %2 = icmp ult <8 x i32> %0, %1
16569  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16570  %4 = bitcast <32 x i1> %3 to i32
16571  ret i32 %4
16572}
16573
16574define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16575; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16576; VLX:       # %bb.0: # %entry
16577; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
16578; VLX-NEXT:    kmovd %k0, %eax
16579; VLX-NEXT:    vzeroupper
16580; VLX-NEXT:    retq
16581;
16582; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16583; NoVLX:       # %bb.0: # %entry
16584; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16585; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
16586; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16587; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16588; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16589; NoVLX-NEXT:    kmovw %k0, %eax
16590; NoVLX-NEXT:    vzeroupper
16591; NoVLX-NEXT:    retq
16592entry:
16593  %0 = bitcast <4 x i64> %__a to <8 x i32>
16594  %load = load <4 x i64>, ptr %__b
16595  %1 = bitcast <4 x i64> %load to <8 x i32>
16596  %2 = icmp ult <8 x i32> %0, %1
16597  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16598  %4 = bitcast <32 x i1> %3 to i32
16599  ret i32 %4
16600}
16601
16602define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16603; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16604; VLX:       # %bb.0: # %entry
16605; VLX-NEXT:    kmovd %edi, %k1
16606; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16607; VLX-NEXT:    kmovd %k0, %eax
16608; VLX-NEXT:    vzeroupper
16609; VLX-NEXT:    retq
16610;
16611; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16612; NoVLX:       # %bb.0: # %entry
16613; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16614; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16615; NoVLX-NEXT:    kmovw %edi, %k1
16616; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16617; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16618; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16619; NoVLX-NEXT:    kmovw %k0, %eax
16620; NoVLX-NEXT:    vzeroupper
16621; NoVLX-NEXT:    retq
16622entry:
16623  %0 = bitcast <4 x i64> %__a to <8 x i32>
16624  %1 = bitcast <4 x i64> %__b to <8 x i32>
16625  %2 = icmp ult <8 x i32> %0, %1
16626  %3 = bitcast i8 %__u to <8 x i1>
16627  %4 = and <8 x i1> %2, %3
16628  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16629  %6 = bitcast <32 x i1> %5 to i32
16630  ret i32 %6
16631}
16632
16633define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16634; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16635; VLX:       # %bb.0: # %entry
16636; VLX-NEXT:    kmovd %edi, %k1
16637; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16638; VLX-NEXT:    kmovd %k0, %eax
16639; VLX-NEXT:    vzeroupper
16640; VLX-NEXT:    retq
16641;
16642; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16643; NoVLX:       # %bb.0: # %entry
16644; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16645; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
16646; NoVLX-NEXT:    kmovw %edi, %k1
16647; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16648; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16649; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16650; NoVLX-NEXT:    kmovw %k0, %eax
16651; NoVLX-NEXT:    vzeroupper
16652; NoVLX-NEXT:    retq
16653entry:
16654  %0 = bitcast <4 x i64> %__a to <8 x i32>
16655  %load = load <4 x i64>, ptr %__b
16656  %1 = bitcast <4 x i64> %load to <8 x i32>
16657  %2 = icmp ult <8 x i32> %0, %1
16658  %3 = bitcast i8 %__u to <8 x i1>
16659  %4 = and <8 x i1> %2, %3
16660  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16661  %6 = bitcast <32 x i1> %5 to i32
16662  ret i32 %6
16663}
16664
16665
16666define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16667; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16668; VLX:       # %bb.0: # %entry
16669; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
16670; VLX-NEXT:    kmovd %k0, %eax
16671; VLX-NEXT:    vzeroupper
16672; VLX-NEXT:    retq
16673;
16674; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16675; NoVLX:       # %bb.0: # %entry
16676; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16677; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
16678; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16679; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16680; NoVLX-NEXT:    kmovw %k0, %eax
16681; NoVLX-NEXT:    vzeroupper
16682; NoVLX-NEXT:    retq
16683entry:
16684  %0 = bitcast <4 x i64> %__a to <8 x i32>
16685  %load = load i32, ptr %__b
16686  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16687  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16688  %2 = icmp ult <8 x i32> %0, %1
16689  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16690  %4 = bitcast <32 x i1> %3 to i32
16691  ret i32 %4
16692}
16693
16694define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16695; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16696; VLX:       # %bb.0: # %entry
16697; VLX-NEXT:    kmovd %edi, %k1
16698; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16699; VLX-NEXT:    kmovd %k0, %eax
16700; VLX-NEXT:    vzeroupper
16701; VLX-NEXT:    retq
16702;
16703; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16704; NoVLX:       # %bb.0: # %entry
16705; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16706; NoVLX-NEXT:    kmovw %edi, %k1
16707; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16708; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16709; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16710; NoVLX-NEXT:    kmovw %k0, %eax
16711; NoVLX-NEXT:    vzeroupper
16712; NoVLX-NEXT:    retq
16713entry:
16714  %0 = bitcast <4 x i64> %__a to <8 x i32>
16715  %load = load i32, ptr %__b
16716  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16717  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16718  %2 = icmp ult <8 x i32> %0, %1
16719  %3 = bitcast i8 %__u to <8 x i1>
16720  %4 = and <8 x i1> %3, %2
16721  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16722  %6 = bitcast <32 x i1> %5 to i32
16723  ret i32 %6
16724}
16725
16726
16727define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16728; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16729; VLX:       # %bb.0: # %entry
16730; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
16731; VLX-NEXT:    kmovq %k0, %rax
16732; VLX-NEXT:    vzeroupper
16733; VLX-NEXT:    retq
16734;
16735; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16736; NoVLX:       # %bb.0: # %entry
16737; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16738; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16739; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16740; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16741; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16742; NoVLX-NEXT:    kmovw %k0, %eax
16743; NoVLX-NEXT:    vzeroupper
16744; NoVLX-NEXT:    retq
16745entry:
16746  %0 = bitcast <4 x i64> %__a to <8 x i32>
16747  %1 = bitcast <4 x i64> %__b to <8 x i32>
16748  %2 = icmp ult <8 x i32> %0, %1
16749  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16750  %4 = bitcast <64 x i1> %3 to i64
16751  ret i64 %4
16752}
16753
16754define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16755; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16756; VLX:       # %bb.0: # %entry
16757; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
16758; VLX-NEXT:    kmovq %k0, %rax
16759; VLX-NEXT:    vzeroupper
16760; VLX-NEXT:    retq
16761;
16762; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16763; NoVLX:       # %bb.0: # %entry
16764; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16765; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
16766; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16767; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16768; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16769; NoVLX-NEXT:    kmovw %k0, %eax
16770; NoVLX-NEXT:    vzeroupper
16771; NoVLX-NEXT:    retq
16772entry:
16773  %0 = bitcast <4 x i64> %__a to <8 x i32>
16774  %load = load <4 x i64>, ptr %__b
16775  %1 = bitcast <4 x i64> %load to <8 x i32>
16776  %2 = icmp ult <8 x i32> %0, %1
16777  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16778  %4 = bitcast <64 x i1> %3 to i64
16779  ret i64 %4
16780}
16781
16782define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16783; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16784; VLX:       # %bb.0: # %entry
16785; VLX-NEXT:    kmovd %edi, %k1
16786; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16787; VLX-NEXT:    kmovq %k0, %rax
16788; VLX-NEXT:    vzeroupper
16789; VLX-NEXT:    retq
16790;
16791; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16792; NoVLX:       # %bb.0: # %entry
16793; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16794; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16795; NoVLX-NEXT:    kmovw %edi, %k1
16796; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16797; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16798; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16799; NoVLX-NEXT:    kmovw %k0, %eax
16800; NoVLX-NEXT:    vzeroupper
16801; NoVLX-NEXT:    retq
16802entry:
16803  %0 = bitcast <4 x i64> %__a to <8 x i32>
16804  %1 = bitcast <4 x i64> %__b to <8 x i32>
16805  %2 = icmp ult <8 x i32> %0, %1
16806  %3 = bitcast i8 %__u to <8 x i1>
16807  %4 = and <8 x i1> %2, %3
16808  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16809  %6 = bitcast <64 x i1> %5 to i64
16810  ret i64 %6
16811}
16812
16813define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16814; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16815; VLX:       # %bb.0: # %entry
16816; VLX-NEXT:    kmovd %edi, %k1
16817; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16818; VLX-NEXT:    kmovq %k0, %rax
16819; VLX-NEXT:    vzeroupper
16820; VLX-NEXT:    retq
16821;
16822; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16823; NoVLX:       # %bb.0: # %entry
16824; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16825; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
16826; NoVLX-NEXT:    kmovw %edi, %k1
16827; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16828; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16829; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16830; NoVLX-NEXT:    kmovw %k0, %eax
16831; NoVLX-NEXT:    vzeroupper
16832; NoVLX-NEXT:    retq
16833entry:
16834  %0 = bitcast <4 x i64> %__a to <8 x i32>
16835  %load = load <4 x i64>, ptr %__b
16836  %1 = bitcast <4 x i64> %load to <8 x i32>
16837  %2 = icmp ult <8 x i32> %0, %1
16838  %3 = bitcast i8 %__u to <8 x i1>
16839  %4 = and <8 x i1> %2, %3
16840  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16841  %6 = bitcast <64 x i1> %5 to i64
16842  ret i64 %6
16843}
16844
16845
16846define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16847; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16848; VLX:       # %bb.0: # %entry
16849; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
16850; VLX-NEXT:    kmovq %k0, %rax
16851; VLX-NEXT:    vzeroupper
16852; VLX-NEXT:    retq
16853;
16854; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16855; NoVLX:       # %bb.0: # %entry
16856; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16857; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
16858; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16859; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16860; NoVLX-NEXT:    kmovw %k0, %eax
16861; NoVLX-NEXT:    vzeroupper
16862; NoVLX-NEXT:    retq
16863entry:
16864  %0 = bitcast <4 x i64> %__a to <8 x i32>
16865  %load = load i32, ptr %__b
16866  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16867  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16868  %2 = icmp ult <8 x i32> %0, %1
16869  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16870  %4 = bitcast <64 x i1> %3 to i64
16871  ret i64 %4
16872}
16873
16874define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16875; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16876; VLX:       # %bb.0: # %entry
16877; VLX-NEXT:    kmovd %edi, %k1
16878; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16879; VLX-NEXT:    kmovq %k0, %rax
16880; VLX-NEXT:    vzeroupper
16881; VLX-NEXT:    retq
16882;
16883; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16884; NoVLX:       # %bb.0: # %entry
16885; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16886; NoVLX-NEXT:    kmovw %edi, %k1
16887; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16888; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16889; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16890; NoVLX-NEXT:    kmovw %k0, %eax
16891; NoVLX-NEXT:    vzeroupper
16892; NoVLX-NEXT:    retq
16893entry:
16894  %0 = bitcast <4 x i64> %__a to <8 x i32>
16895  %load = load i32, ptr %__b
16896  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16897  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16898  %2 = icmp ult <8 x i32> %0, %1
16899  %3 = bitcast i8 %__u to <8 x i1>
16900  %4 = and <8 x i1> %3, %2
16901  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16902  %6 = bitcast <64 x i1> %5 to i64
16903  ret i64 %6
16904}
16905
16906
16907define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16908; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16909; VLX:       # %bb.0: # %entry
16910; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16911; VLX-NEXT:    kmovd %k0, %eax
16912; VLX-NEXT:    vzeroupper
16913; VLX-NEXT:    retq
16914;
16915; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16916; NoVLX:       # %bb.0: # %entry
16917; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16918; NoVLX-NEXT:    kmovw %k0, %eax
16919; NoVLX-NEXT:    vzeroupper
16920; NoVLX-NEXT:    retq
16921entry:
16922  %0 = bitcast <8 x i64> %__a to <16 x i32>
16923  %1 = bitcast <8 x i64> %__b to <16 x i32>
16924  %2 = icmp ult <16 x i32> %0, %1
16925  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16926  %4 = bitcast <32 x i1> %3 to i32
16927  ret i32 %4
16928}
16929
16930define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
16931; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16932; VLX:       # %bb.0: # %entry
16933; VLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
16934; VLX-NEXT:    kmovd %k0, %eax
16935; VLX-NEXT:    vzeroupper
16936; VLX-NEXT:    retq
16937;
16938; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16939; NoVLX:       # %bb.0: # %entry
16940; NoVLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
16941; NoVLX-NEXT:    kmovw %k0, %eax
16942; NoVLX-NEXT:    vzeroupper
16943; NoVLX-NEXT:    retq
16944entry:
16945  %0 = bitcast <8 x i64> %__a to <16 x i32>
16946  %load = load <8 x i64>, ptr %__b
16947  %1 = bitcast <8 x i64> %load to <16 x i32>
16948  %2 = icmp ult <16 x i32> %0, %1
16949  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16950  %4 = bitcast <32 x i1> %3 to i32
16951  ret i32 %4
16952}
16953
16954define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16955; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16956; VLX:       # %bb.0: # %entry
16957; VLX-NEXT:    kmovd %edi, %k1
16958; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16959; VLX-NEXT:    kmovd %k0, %eax
16960; VLX-NEXT:    vzeroupper
16961; VLX-NEXT:    retq
16962;
16963; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16964; NoVLX:       # %bb.0: # %entry
16965; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16966; NoVLX-NEXT:    kmovw %k0, %eax
16967; NoVLX-NEXT:    andl %edi, %eax
16968; NoVLX-NEXT:    vzeroupper
16969; NoVLX-NEXT:    retq
16970entry:
16971  %0 = bitcast <8 x i64> %__a to <16 x i32>
16972  %1 = bitcast <8 x i64> %__b to <16 x i32>
16973  %2 = icmp ult <16 x i32> %0, %1
16974  %3 = bitcast i16 %__u to <16 x i1>
16975  %4 = and <16 x i1> %2, %3
16976  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16977  %6 = bitcast <32 x i1> %5 to i32
16978  ret i32 %6
16979}
16980
16981define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
16982; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16983; VLX:       # %bb.0: # %entry
16984; VLX-NEXT:    kmovd %edi, %k1
16985; VLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0 {%k1}
16986; VLX-NEXT:    kmovd %k0, %eax
16987; VLX-NEXT:    vzeroupper
16988; VLX-NEXT:    retq
16989;
16990; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16991; NoVLX:       # %bb.0: # %entry
16992; NoVLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0
16993; NoVLX-NEXT:    kmovw %k0, %eax
16994; NoVLX-NEXT:    andl %edi, %eax
16995; NoVLX-NEXT:    vzeroupper
16996; NoVLX-NEXT:    retq
16997entry:
16998  %0 = bitcast <8 x i64> %__a to <16 x i32>
16999  %load = load <8 x i64>, ptr %__b
17000  %1 = bitcast <8 x i64> %load to <16 x i32>
17001  %2 = icmp ult <16 x i32> %0, %1
17002  %3 = bitcast i16 %__u to <16 x i1>
17003  %4 = and <16 x i1> %2, %3
17004  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17005  %6 = bitcast <32 x i1> %5 to i32
17006  ret i32 %6
17007}
17008
17009
17010define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17011; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17012; VLX:       # %bb.0: # %entry
17013; VLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17014; VLX-NEXT:    kmovd %k0, %eax
17015; VLX-NEXT:    vzeroupper
17016; VLX-NEXT:    retq
17017;
17018; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17019; NoVLX:       # %bb.0: # %entry
17020; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17021; NoVLX-NEXT:    kmovw %k0, %eax
17022; NoVLX-NEXT:    vzeroupper
17023; NoVLX-NEXT:    retq
17024entry:
17025  %0 = bitcast <8 x i64> %__a to <16 x i32>
17026  %load = load i32, ptr %__b
17027  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17028  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17029  %2 = icmp ult <16 x i32> %0, %1
17030  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17031  %4 = bitcast <32 x i1> %3 to i32
17032  ret i32 %4
17033}
17034
17035define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17036; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17037; VLX:       # %bb.0: # %entry
17038; VLX-NEXT:    kmovd %edi, %k1
17039; VLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17040; VLX-NEXT:    kmovd %k0, %eax
17041; VLX-NEXT:    vzeroupper
17042; VLX-NEXT:    retq
17043;
17044; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17045; NoVLX:       # %bb.0: # %entry
17046; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0
17047; NoVLX-NEXT:    kmovw %k0, %eax
17048; NoVLX-NEXT:    andl %edi, %eax
17049; NoVLX-NEXT:    vzeroupper
17050; NoVLX-NEXT:    retq
17051entry:
17052  %0 = bitcast <8 x i64> %__a to <16 x i32>
17053  %load = load i32, ptr %__b
17054  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17055  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17056  %2 = icmp ult <16 x i32> %0, %1
17057  %3 = bitcast i16 %__u to <16 x i1>
17058  %4 = and <16 x i1> %3, %2
17059  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17060  %6 = bitcast <32 x i1> %5 to i32
17061  ret i32 %6
17062}
17063
17064
17065define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17066; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17067; VLX:       # %bb.0: # %entry
17068; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17069; VLX-NEXT:    kmovq %k0, %rax
17070; VLX-NEXT:    vzeroupper
17071; VLX-NEXT:    retq
17072;
17073; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17074; NoVLX:       # %bb.0: # %entry
17075; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17076; NoVLX-NEXT:    kmovw %k0, %eax
17077; NoVLX-NEXT:    vzeroupper
17078; NoVLX-NEXT:    retq
17079entry:
17080  %0 = bitcast <8 x i64> %__a to <16 x i32>
17081  %1 = bitcast <8 x i64> %__b to <16 x i32>
17082  %2 = icmp ult <16 x i32> %0, %1
17083  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17084  %4 = bitcast <64 x i1> %3 to i64
17085  ret i64 %4
17086}
17087
17088define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17089; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17090; VLX:       # %bb.0: # %entry
17091; VLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17092; VLX-NEXT:    kmovq %k0, %rax
17093; VLX-NEXT:    vzeroupper
17094; VLX-NEXT:    retq
17095;
17096; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17097; NoVLX:       # %bb.0: # %entry
17098; NoVLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17099; NoVLX-NEXT:    kmovw %k0, %eax
17100; NoVLX-NEXT:    vzeroupper
17101; NoVLX-NEXT:    retq
17102entry:
17103  %0 = bitcast <8 x i64> %__a to <16 x i32>
17104  %load = load <8 x i64>, ptr %__b
17105  %1 = bitcast <8 x i64> %load to <16 x i32>
17106  %2 = icmp ult <16 x i32> %0, %1
17107  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17108  %4 = bitcast <64 x i1> %3 to i64
17109  ret i64 %4
17110}
17111
17112define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17113; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17114; VLX:       # %bb.0: # %entry
17115; VLX-NEXT:    kmovd %edi, %k1
17116; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17117; VLX-NEXT:    kmovq %k0, %rax
17118; VLX-NEXT:    vzeroupper
17119; VLX-NEXT:    retq
17120;
17121; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17122; NoVLX:       # %bb.0: # %entry
17123; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17124; NoVLX-NEXT:    kmovw %k0, %eax
17125; NoVLX-NEXT:    andl %edi, %eax
17126; NoVLX-NEXT:    vzeroupper
17127; NoVLX-NEXT:    retq
17128entry:
17129  %0 = bitcast <8 x i64> %__a to <16 x i32>
17130  %1 = bitcast <8 x i64> %__b to <16 x i32>
17131  %2 = icmp ult <16 x i32> %0, %1
17132  %3 = bitcast i16 %__u to <16 x i1>
17133  %4 = and <16 x i1> %2, %3
17134  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17135  %6 = bitcast <64 x i1> %5 to i64
17136  ret i64 %6
17137}
17138
17139define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17140; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17141; VLX:       # %bb.0: # %entry
17142; VLX-NEXT:    kmovd %edi, %k1
17143; VLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17144; VLX-NEXT:    kmovq %k0, %rax
17145; VLX-NEXT:    vzeroupper
17146; VLX-NEXT:    retq
17147;
17148; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17149; NoVLX:       # %bb.0: # %entry
17150; NoVLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0
17151; NoVLX-NEXT:    kmovw %k0, %eax
17152; NoVLX-NEXT:    andl %edi, %eax
17153; NoVLX-NEXT:    vzeroupper
17154; NoVLX-NEXT:    retq
17155entry:
17156  %0 = bitcast <8 x i64> %__a to <16 x i32>
17157  %load = load <8 x i64>, ptr %__b
17158  %1 = bitcast <8 x i64> %load to <16 x i32>
17159  %2 = icmp ult <16 x i32> %0, %1
17160  %3 = bitcast i16 %__u to <16 x i1>
17161  %4 = and <16 x i1> %2, %3
17162  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17163  %6 = bitcast <64 x i1> %5 to i64
17164  ret i64 %6
17165}
17166
17167
17168define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17169; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17170; VLX:       # %bb.0: # %entry
17171; VLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17172; VLX-NEXT:    kmovq %k0, %rax
17173; VLX-NEXT:    vzeroupper
17174; VLX-NEXT:    retq
17175;
17176; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17177; NoVLX:       # %bb.0: # %entry
17178; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17179; NoVLX-NEXT:    kmovw %k0, %eax
17180; NoVLX-NEXT:    vzeroupper
17181; NoVLX-NEXT:    retq
17182entry:
17183  %0 = bitcast <8 x i64> %__a to <16 x i32>
17184  %load = load i32, ptr %__b
17185  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17186  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17187  %2 = icmp ult <16 x i32> %0, %1
17188  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17189  %4 = bitcast <64 x i1> %3 to i64
17190  ret i64 %4
17191}
17192
17193define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17194; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17195; VLX:       # %bb.0: # %entry
17196; VLX-NEXT:    kmovd %edi, %k1
17197; VLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17198; VLX-NEXT:    kmovq %k0, %rax
17199; VLX-NEXT:    vzeroupper
17200; VLX-NEXT:    retq
17201;
17202; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17203; NoVLX:       # %bb.0: # %entry
17204; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0
17205; NoVLX-NEXT:    kmovw %k0, %eax
17206; NoVLX-NEXT:    andl %edi, %eax
17207; NoVLX-NEXT:    vzeroupper
17208; NoVLX-NEXT:    retq
17209entry:
17210  %0 = bitcast <8 x i64> %__a to <16 x i32>
17211  %load = load i32, ptr %__b
17212  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17213  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17214  %2 = icmp ult <16 x i32> %0, %1
17215  %3 = bitcast i16 %__u to <16 x i1>
17216  %4 = and <16 x i1> %3, %2
17217  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17218  %6 = bitcast <64 x i1> %5 to i64
17219  ret i64 %6
17220}
17221
17222
17223define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17224; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17225; VLX:       # %bb.0: # %entry
17226; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17227; VLX-NEXT:    kmovb %k0, %eax
17228; VLX-NEXT:    retq
17229;
17230; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17231; NoVLX:       # %bb.0: # %entry
17232; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17233; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17234; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17235; NoVLX-NEXT:    kmovw %k0, %eax
17236; NoVLX-NEXT:    andl $3, %eax
17237; NoVLX-NEXT:    vzeroupper
17238; NoVLX-NEXT:    retq
17239entry:
17240  %0 = bitcast <2 x i64> %__a to <2 x i64>
17241  %1 = bitcast <2 x i64> %__b to <2 x i64>
17242  %2 = icmp ult <2 x i64> %0, %1
17243  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17244  %4 = bitcast <4 x i1> %3 to i4
17245  ret i4 %4
17246}
17247
17248define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17249; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17250; VLX:       # %bb.0: # %entry
17251; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17252; VLX-NEXT:    kmovb %k0, %eax
17253; VLX-NEXT:    retq
17254;
17255; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17256; NoVLX:       # %bb.0: # %entry
17257; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17258; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17259; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17260; NoVLX-NEXT:    kmovw %k0, %eax
17261; NoVLX-NEXT:    andl $3, %eax
17262; NoVLX-NEXT:    vzeroupper
17263; NoVLX-NEXT:    retq
17264entry:
17265  %0 = bitcast <2 x i64> %__a to <2 x i64>
17266  %load = load <2 x i64>, ptr %__b
17267  %1 = bitcast <2 x i64> %load to <2 x i64>
17268  %2 = icmp ult <2 x i64> %0, %1
17269  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17270  %4 = bitcast <4 x i1> %3 to i4
17271  ret i4 %4
17272}
17273
17274define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17275; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17276; VLX:       # %bb.0: # %entry
17277; VLX-NEXT:    kmovd %edi, %k1
17278; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17279; VLX-NEXT:    kmovb %k0, %eax
17280; VLX-NEXT:    retq
17281;
17282; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17283; NoVLX:       # %bb.0: # %entry
17284; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17285; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17286; NoVLX-NEXT:    kmovw %edi, %k1
17287; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17288; NoVLX-NEXT:    kmovw %k0, %eax
17289; NoVLX-NEXT:    andl $3, %eax
17290; NoVLX-NEXT:    vzeroupper
17291; NoVLX-NEXT:    retq
17292entry:
17293  %0 = bitcast <2 x i64> %__a to <2 x i64>
17294  %1 = bitcast <2 x i64> %__b to <2 x i64>
17295  %2 = icmp ult <2 x i64> %0, %1
17296  %3 = bitcast i8 %__u to <8 x i1>
17297  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17298  %4 = and <2 x i1> %2, %extract.i
17299  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17300  %6 = bitcast <4 x i1> %5 to i4
17301  ret i4 %6
17302}
17303
17304define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17305; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17306; VLX:       # %bb.0: # %entry
17307; VLX-NEXT:    kmovd %edi, %k1
17308; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17309; VLX-NEXT:    kmovb %k0, %eax
17310; VLX-NEXT:    retq
17311;
17312; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17313; NoVLX:       # %bb.0: # %entry
17314; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17315; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17316; NoVLX-NEXT:    kmovw %edi, %k1
17317; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17318; NoVLX-NEXT:    kmovw %k0, %eax
17319; NoVLX-NEXT:    andl $3, %eax
17320; NoVLX-NEXT:    vzeroupper
17321; NoVLX-NEXT:    retq
17322entry:
17323  %0 = bitcast <2 x i64> %__a to <2 x i64>
17324  %load = load <2 x i64>, ptr %__b
17325  %1 = bitcast <2 x i64> %load to <2 x i64>
17326  %2 = icmp ult <2 x i64> %0, %1
17327  %3 = bitcast i8 %__u to <8 x i1>
17328  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17329  %4 = and <2 x i1> %2, %extract.i
17330  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17331  %6 = bitcast <4 x i1> %5 to i4
17332  ret i4 %6
17333}
17334
17335
17336define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17337; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17338; VLX:       # %bb.0: # %entry
17339; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17340; VLX-NEXT:    kmovb %k0, %eax
17341; VLX-NEXT:    retq
17342;
17343; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17344; NoVLX:       # %bb.0: # %entry
17345; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17346; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17347; NoVLX-NEXT:    kmovw %k0, %eax
17348; NoVLX-NEXT:    andl $3, %eax
17349; NoVLX-NEXT:    vzeroupper
17350; NoVLX-NEXT:    retq
17351entry:
17352  %0 = bitcast <2 x i64> %__a to <2 x i64>
17353  %load = load i64, ptr %__b
17354  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17355  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17356  %2 = icmp ult <2 x i64> %0, %1
17357  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17358  %4 = bitcast <4 x i1> %3 to i4
17359  ret i4 %4
17360}
17361
17362define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17363; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17364; VLX:       # %bb.0: # %entry
17365; VLX-NEXT:    kmovd %edi, %k1
17366; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17367; VLX-NEXT:    kmovb %k0, %eax
17368; VLX-NEXT:    retq
17369;
17370; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17371; NoVLX:       # %bb.0: # %entry
17372; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17373; NoVLX-NEXT:    kmovw %edi, %k1
17374; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17375; NoVLX-NEXT:    kmovw %k0, %eax
17376; NoVLX-NEXT:    andl $3, %eax
17377; NoVLX-NEXT:    vzeroupper
17378; NoVLX-NEXT:    retq
17379entry:
17380  %0 = bitcast <2 x i64> %__a to <2 x i64>
17381  %load = load i64, ptr %__b
17382  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17383  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17384  %2 = icmp ult <2 x i64> %0, %1
17385  %3 = bitcast i8 %__u to <8 x i1>
17386  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17387  %4 = and <2 x i1> %extract.i, %2
17388  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17389  %6 = bitcast <4 x i1> %5 to i4
17390  ret i4 %6
17391}
17392
17393
17394define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17395; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17396; VLX:       # %bb.0: # %entry
17397; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17398; VLX-NEXT:    kmovd %k0, %eax
17399; VLX-NEXT:    # kill: def $al killed $al killed $eax
17400; VLX-NEXT:    retq
17401;
17402; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17403; NoVLX:       # %bb.0: # %entry
17404; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17405; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17406; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17407; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17408; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17409; NoVLX-NEXT:    kmovw %k0, %eax
17410; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17411; NoVLX-NEXT:    vzeroupper
17412; NoVLX-NEXT:    retq
17413entry:
17414  %0 = bitcast <2 x i64> %__a to <2 x i64>
17415  %1 = bitcast <2 x i64> %__b to <2 x i64>
17416  %2 = icmp ult <2 x i64> %0, %1
17417  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17418  %4 = bitcast <8 x i1> %3 to i8
17419  ret i8 %4
17420}
17421
17422define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17423; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17424; VLX:       # %bb.0: # %entry
17425; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17426; VLX-NEXT:    kmovd %k0, %eax
17427; VLX-NEXT:    # kill: def $al killed $al killed $eax
17428; VLX-NEXT:    retq
17429;
17430; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17431; NoVLX:       # %bb.0: # %entry
17432; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17433; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17434; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17435; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17436; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17437; NoVLX-NEXT:    kmovw %k0, %eax
17438; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17439; NoVLX-NEXT:    vzeroupper
17440; NoVLX-NEXT:    retq
17441entry:
17442  %0 = bitcast <2 x i64> %__a to <2 x i64>
17443  %load = load <2 x i64>, ptr %__b
17444  %1 = bitcast <2 x i64> %load to <2 x i64>
17445  %2 = icmp ult <2 x i64> %0, %1
17446  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17447  %4 = bitcast <8 x i1> %3 to i8
17448  ret i8 %4
17449}
17450
17451define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17452; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17453; VLX:       # %bb.0: # %entry
17454; VLX-NEXT:    kmovd %edi, %k1
17455; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17456; VLX-NEXT:    kmovd %k0, %eax
17457; VLX-NEXT:    # kill: def $al killed $al killed $eax
17458; VLX-NEXT:    retq
17459;
17460; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17461; NoVLX:       # %bb.0: # %entry
17462; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17463; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17464; NoVLX-NEXT:    kmovw %edi, %k1
17465; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17466; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17467; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17468; NoVLX-NEXT:    kmovw %k0, %eax
17469; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17470; NoVLX-NEXT:    vzeroupper
17471; NoVLX-NEXT:    retq
17472entry:
17473  %0 = bitcast <2 x i64> %__a to <2 x i64>
17474  %1 = bitcast <2 x i64> %__b to <2 x i64>
17475  %2 = icmp ult <2 x i64> %0, %1
17476  %3 = bitcast i8 %__u to <8 x i1>
17477  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17478  %4 = and <2 x i1> %2, %extract.i
17479  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17480  %6 = bitcast <8 x i1> %5 to i8
17481  ret i8 %6
17482}
17483
17484define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17485; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17486; VLX:       # %bb.0: # %entry
17487; VLX-NEXT:    kmovd %edi, %k1
17488; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17489; VLX-NEXT:    kmovd %k0, %eax
17490; VLX-NEXT:    # kill: def $al killed $al killed $eax
17491; VLX-NEXT:    retq
17492;
17493; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17494; NoVLX:       # %bb.0: # %entry
17495; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17496; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17497; NoVLX-NEXT:    kmovw %edi, %k1
17498; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17499; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17500; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17501; NoVLX-NEXT:    kmovw %k0, %eax
17502; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17503; NoVLX-NEXT:    vzeroupper
17504; NoVLX-NEXT:    retq
17505entry:
17506  %0 = bitcast <2 x i64> %__a to <2 x i64>
17507  %load = load <2 x i64>, ptr %__b
17508  %1 = bitcast <2 x i64> %load to <2 x i64>
17509  %2 = icmp ult <2 x i64> %0, %1
17510  %3 = bitcast i8 %__u to <8 x i1>
17511  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17512  %4 = and <2 x i1> %2, %extract.i
17513  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17514  %6 = bitcast <8 x i1> %5 to i8
17515  ret i8 %6
17516}
17517
17518
17519define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17520; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17521; VLX:       # %bb.0: # %entry
17522; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17523; VLX-NEXT:    kmovd %k0, %eax
17524; VLX-NEXT:    # kill: def $al killed $al killed $eax
17525; VLX-NEXT:    retq
17526;
17527; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17528; NoVLX:       # %bb.0: # %entry
17529; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17530; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17531; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17532; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17533; NoVLX-NEXT:    kmovw %k0, %eax
17534; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17535; NoVLX-NEXT:    vzeroupper
17536; NoVLX-NEXT:    retq
17537entry:
17538  %0 = bitcast <2 x i64> %__a to <2 x i64>
17539  %load = load i64, ptr %__b
17540  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17541  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17542  %2 = icmp ult <2 x i64> %0, %1
17543  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17544  %4 = bitcast <8 x i1> %3 to i8
17545  ret i8 %4
17546}
17547
17548define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17549; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17550; VLX:       # %bb.0: # %entry
17551; VLX-NEXT:    kmovd %edi, %k1
17552; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17553; VLX-NEXT:    kmovd %k0, %eax
17554; VLX-NEXT:    # kill: def $al killed $al killed $eax
17555; VLX-NEXT:    retq
17556;
17557; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17558; NoVLX:       # %bb.0: # %entry
17559; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17560; NoVLX-NEXT:    kmovw %edi, %k1
17561; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17562; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17563; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17564; NoVLX-NEXT:    kmovw %k0, %eax
17565; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17566; NoVLX-NEXT:    vzeroupper
17567; NoVLX-NEXT:    retq
17568entry:
17569  %0 = bitcast <2 x i64> %__a to <2 x i64>
17570  %load = load i64, ptr %__b
17571  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17572  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17573  %2 = icmp ult <2 x i64> %0, %1
17574  %3 = bitcast i8 %__u to <8 x i1>
17575  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17576  %4 = and <2 x i1> %extract.i, %2
17577  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17578  %6 = bitcast <8 x i1> %5 to i8
17579  ret i8 %6
17580}
17581
17582
17583define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17584; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17585; VLX:       # %bb.0: # %entry
17586; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17587; VLX-NEXT:    kmovd %k0, %eax
17588; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17589; VLX-NEXT:    retq
17590;
17591; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17592; NoVLX:       # %bb.0: # %entry
17593; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17594; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17595; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17596; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17597; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17598; NoVLX-NEXT:    kmovw %k0, %eax
17599; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17600; NoVLX-NEXT:    vzeroupper
17601; NoVLX-NEXT:    retq
17602entry:
17603  %0 = bitcast <2 x i64> %__a to <2 x i64>
17604  %1 = bitcast <2 x i64> %__b to <2 x i64>
17605  %2 = icmp ult <2 x i64> %0, %1
17606  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17607  %4 = bitcast <16 x i1> %3 to i16
17608  ret i16 %4
17609}
17610
17611define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17612; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17613; VLX:       # %bb.0: # %entry
17614; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17615; VLX-NEXT:    kmovd %k0, %eax
17616; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17617; VLX-NEXT:    retq
17618;
17619; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17620; NoVLX:       # %bb.0: # %entry
17621; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17622; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17623; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17624; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17625; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17626; NoVLX-NEXT:    kmovw %k0, %eax
17627; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17628; NoVLX-NEXT:    vzeroupper
17629; NoVLX-NEXT:    retq
17630entry:
17631  %0 = bitcast <2 x i64> %__a to <2 x i64>
17632  %load = load <2 x i64>, ptr %__b
17633  %1 = bitcast <2 x i64> %load to <2 x i64>
17634  %2 = icmp ult <2 x i64> %0, %1
17635  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17636  %4 = bitcast <16 x i1> %3 to i16
17637  ret i16 %4
17638}
17639
17640define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17641; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17642; VLX:       # %bb.0: # %entry
17643; VLX-NEXT:    kmovd %edi, %k1
17644; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17645; VLX-NEXT:    kmovd %k0, %eax
17646; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17647; VLX-NEXT:    retq
17648;
17649; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17650; NoVLX:       # %bb.0: # %entry
17651; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17652; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17653; NoVLX-NEXT:    kmovw %edi, %k1
17654; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17655; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17656; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17657; NoVLX-NEXT:    kmovw %k0, %eax
17658; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17659; NoVLX-NEXT:    vzeroupper
17660; NoVLX-NEXT:    retq
17661entry:
17662  %0 = bitcast <2 x i64> %__a to <2 x i64>
17663  %1 = bitcast <2 x i64> %__b to <2 x i64>
17664  %2 = icmp ult <2 x i64> %0, %1
17665  %3 = bitcast i8 %__u to <8 x i1>
17666  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17667  %4 = and <2 x i1> %2, %extract.i
17668  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17669  %6 = bitcast <16 x i1> %5 to i16
17670  ret i16 %6
17671}
17672
17673define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17674; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17675; VLX:       # %bb.0: # %entry
17676; VLX-NEXT:    kmovd %edi, %k1
17677; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17678; VLX-NEXT:    kmovd %k0, %eax
17679; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17680; VLX-NEXT:    retq
17681;
17682; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17683; NoVLX:       # %bb.0: # %entry
17684; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17685; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17686; NoVLX-NEXT:    kmovw %edi, %k1
17687; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17688; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17689; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17690; NoVLX-NEXT:    kmovw %k0, %eax
17691; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17692; NoVLX-NEXT:    vzeroupper
17693; NoVLX-NEXT:    retq
17694entry:
17695  %0 = bitcast <2 x i64> %__a to <2 x i64>
17696  %load = load <2 x i64>, ptr %__b
17697  %1 = bitcast <2 x i64> %load to <2 x i64>
17698  %2 = icmp ult <2 x i64> %0, %1
17699  %3 = bitcast i8 %__u to <8 x i1>
17700  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17701  %4 = and <2 x i1> %2, %extract.i
17702  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17703  %6 = bitcast <16 x i1> %5 to i16
17704  ret i16 %6
17705}
17706
17707
17708define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17709; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17710; VLX:       # %bb.0: # %entry
17711; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17712; VLX-NEXT:    kmovd %k0, %eax
17713; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17714; VLX-NEXT:    retq
17715;
17716; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17717; NoVLX:       # %bb.0: # %entry
17718; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17719; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17720; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17721; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17722; NoVLX-NEXT:    kmovw %k0, %eax
17723; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17724; NoVLX-NEXT:    vzeroupper
17725; NoVLX-NEXT:    retq
17726entry:
17727  %0 = bitcast <2 x i64> %__a to <2 x i64>
17728  %load = load i64, ptr %__b
17729  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17730  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17731  %2 = icmp ult <2 x i64> %0, %1
17732  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17733  %4 = bitcast <16 x i1> %3 to i16
17734  ret i16 %4
17735}
17736
17737define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17738; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17739; VLX:       # %bb.0: # %entry
17740; VLX-NEXT:    kmovd %edi, %k1
17741; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17742; VLX-NEXT:    kmovd %k0, %eax
17743; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17744; VLX-NEXT:    retq
17745;
17746; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17747; NoVLX:       # %bb.0: # %entry
17748; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17749; NoVLX-NEXT:    kmovw %edi, %k1
17750; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17751; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17752; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17753; NoVLX-NEXT:    kmovw %k0, %eax
17754; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17755; NoVLX-NEXT:    vzeroupper
17756; NoVLX-NEXT:    retq
17757entry:
17758  %0 = bitcast <2 x i64> %__a to <2 x i64>
17759  %load = load i64, ptr %__b
17760  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17761  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17762  %2 = icmp ult <2 x i64> %0, %1
17763  %3 = bitcast i8 %__u to <8 x i1>
17764  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17765  %4 = and <2 x i1> %extract.i, %2
17766  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17767  %6 = bitcast <16 x i1> %5 to i16
17768  ret i16 %6
17769}
17770
17771
17772define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17773; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17774; VLX:       # %bb.0: # %entry
17775; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17776; VLX-NEXT:    kmovd %k0, %eax
17777; VLX-NEXT:    retq
17778;
17779; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17780; NoVLX:       # %bb.0: # %entry
17781; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17782; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17783; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17784; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17785; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17786; NoVLX-NEXT:    kmovw %k0, %eax
17787; NoVLX-NEXT:    vzeroupper
17788; NoVLX-NEXT:    retq
17789entry:
17790  %0 = bitcast <2 x i64> %__a to <2 x i64>
17791  %1 = bitcast <2 x i64> %__b to <2 x i64>
17792  %2 = icmp ult <2 x i64> %0, %1
17793  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17794  %4 = bitcast <32 x i1> %3 to i32
17795  ret i32 %4
17796}
17797
17798define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17799; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17800; VLX:       # %bb.0: # %entry
17801; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17802; VLX-NEXT:    kmovd %k0, %eax
17803; VLX-NEXT:    retq
17804;
17805; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17806; NoVLX:       # %bb.0: # %entry
17807; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17808; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17809; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17810; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17811; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17812; NoVLX-NEXT:    kmovw %k0, %eax
17813; NoVLX-NEXT:    vzeroupper
17814; NoVLX-NEXT:    retq
17815entry:
17816  %0 = bitcast <2 x i64> %__a to <2 x i64>
17817  %load = load <2 x i64>, ptr %__b
17818  %1 = bitcast <2 x i64> %load to <2 x i64>
17819  %2 = icmp ult <2 x i64> %0, %1
17820  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17821  %4 = bitcast <32 x i1> %3 to i32
17822  ret i32 %4
17823}
17824
17825define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17826; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17827; VLX:       # %bb.0: # %entry
17828; VLX-NEXT:    kmovd %edi, %k1
17829; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17830; VLX-NEXT:    kmovd %k0, %eax
17831; VLX-NEXT:    retq
17832;
17833; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17834; NoVLX:       # %bb.0: # %entry
17835; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17836; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17837; NoVLX-NEXT:    kmovw %edi, %k1
17838; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17839; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17840; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17841; NoVLX-NEXT:    kmovw %k0, %eax
17842; NoVLX-NEXT:    vzeroupper
17843; NoVLX-NEXT:    retq
17844entry:
17845  %0 = bitcast <2 x i64> %__a to <2 x i64>
17846  %1 = bitcast <2 x i64> %__b to <2 x i64>
17847  %2 = icmp ult <2 x i64> %0, %1
17848  %3 = bitcast i8 %__u to <8 x i1>
17849  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17850  %4 = and <2 x i1> %2, %extract.i
17851  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17852  %6 = bitcast <32 x i1> %5 to i32
17853  ret i32 %6
17854}
17855
17856define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17857; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17858; VLX:       # %bb.0: # %entry
17859; VLX-NEXT:    kmovd %edi, %k1
17860; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17861; VLX-NEXT:    kmovd %k0, %eax
17862; VLX-NEXT:    retq
17863;
17864; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17865; NoVLX:       # %bb.0: # %entry
17866; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17867; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17868; NoVLX-NEXT:    kmovw %edi, %k1
17869; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17870; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17871; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17872; NoVLX-NEXT:    kmovw %k0, %eax
17873; NoVLX-NEXT:    vzeroupper
17874; NoVLX-NEXT:    retq
17875entry:
17876  %0 = bitcast <2 x i64> %__a to <2 x i64>
17877  %load = load <2 x i64>, ptr %__b
17878  %1 = bitcast <2 x i64> %load to <2 x i64>
17879  %2 = icmp ult <2 x i64> %0, %1
17880  %3 = bitcast i8 %__u to <8 x i1>
17881  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17882  %4 = and <2 x i1> %2, %extract.i
17883  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17884  %6 = bitcast <32 x i1> %5 to i32
17885  ret i32 %6
17886}
17887
17888
17889define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17890; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17891; VLX:       # %bb.0: # %entry
17892; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17893; VLX-NEXT:    kmovd %k0, %eax
17894; VLX-NEXT:    retq
17895;
17896; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17897; NoVLX:       # %bb.0: # %entry
17898; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17899; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17900; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17901; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17902; NoVLX-NEXT:    kmovw %k0, %eax
17903; NoVLX-NEXT:    vzeroupper
17904; NoVLX-NEXT:    retq
17905entry:
17906  %0 = bitcast <2 x i64> %__a to <2 x i64>
17907  %load = load i64, ptr %__b
17908  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17909  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17910  %2 = icmp ult <2 x i64> %0, %1
17911  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17912  %4 = bitcast <32 x i1> %3 to i32
17913  ret i32 %4
17914}
17915
17916define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17917; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17918; VLX:       # %bb.0: # %entry
17919; VLX-NEXT:    kmovd %edi, %k1
17920; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17921; VLX-NEXT:    kmovd %k0, %eax
17922; VLX-NEXT:    retq
17923;
17924; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17925; NoVLX:       # %bb.0: # %entry
17926; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17927; NoVLX-NEXT:    kmovw %edi, %k1
17928; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17929; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17930; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17931; NoVLX-NEXT:    kmovw %k0, %eax
17932; NoVLX-NEXT:    vzeroupper
17933; NoVLX-NEXT:    retq
17934entry:
17935  %0 = bitcast <2 x i64> %__a to <2 x i64>
17936  %load = load i64, ptr %__b
17937  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17938  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17939  %2 = icmp ult <2 x i64> %0, %1
17940  %3 = bitcast i8 %__u to <8 x i1>
17941  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17942  %4 = and <2 x i1> %extract.i, %2
17943  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17944  %6 = bitcast <32 x i1> %5 to i32
17945  ret i32 %6
17946}
17947
17948
17949define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17950; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17951; VLX:       # %bb.0: # %entry
17952; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17953; VLX-NEXT:    kmovq %k0, %rax
17954; VLX-NEXT:    retq
17955;
17956; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17957; NoVLX:       # %bb.0: # %entry
17958; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17959; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17960; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17961; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17962; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17963; NoVLX-NEXT:    kmovw %k0, %eax
17964; NoVLX-NEXT:    vzeroupper
17965; NoVLX-NEXT:    retq
17966entry:
17967  %0 = bitcast <2 x i64> %__a to <2 x i64>
17968  %1 = bitcast <2 x i64> %__b to <2 x i64>
17969  %2 = icmp ult <2 x i64> %0, %1
17970  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17971  %4 = bitcast <64 x i1> %3 to i64
17972  ret i64 %4
17973}
17974
17975define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17976; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17977; VLX:       # %bb.0: # %entry
17978; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17979; VLX-NEXT:    kmovq %k0, %rax
17980; VLX-NEXT:    retq
17981;
17982; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17983; NoVLX:       # %bb.0: # %entry
17984; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17985; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17986; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17987; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17988; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17989; NoVLX-NEXT:    kmovw %k0, %eax
17990; NoVLX-NEXT:    vzeroupper
17991; NoVLX-NEXT:    retq
17992entry:
17993  %0 = bitcast <2 x i64> %__a to <2 x i64>
17994  %load = load <2 x i64>, ptr %__b
17995  %1 = bitcast <2 x i64> %load to <2 x i64>
17996  %2 = icmp ult <2 x i64> %0, %1
17997  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17998  %4 = bitcast <64 x i1> %3 to i64
17999  ret i64 %4
18000}
18001
18002define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18003; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18004; VLX:       # %bb.0: # %entry
18005; VLX-NEXT:    kmovd %edi, %k1
18006; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18007; VLX-NEXT:    kmovq %k0, %rax
18008; VLX-NEXT:    retq
18009;
18010; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18011; NoVLX:       # %bb.0: # %entry
18012; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18013; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18014; NoVLX-NEXT:    kmovw %edi, %k1
18015; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18016; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18017; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18018; NoVLX-NEXT:    kmovw %k0, %eax
18019; NoVLX-NEXT:    vzeroupper
18020; NoVLX-NEXT:    retq
18021entry:
18022  %0 = bitcast <2 x i64> %__a to <2 x i64>
18023  %1 = bitcast <2 x i64> %__b to <2 x i64>
18024  %2 = icmp ult <2 x i64> %0, %1
18025  %3 = bitcast i8 %__u to <8 x i1>
18026  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18027  %4 = and <2 x i1> %2, %extract.i
18028  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18029  %6 = bitcast <64 x i1> %5 to i64
18030  ret i64 %6
18031}
18032
18033define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
18034; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18035; VLX:       # %bb.0: # %entry
18036; VLX-NEXT:    kmovd %edi, %k1
18037; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18038; VLX-NEXT:    kmovq %k0, %rax
18039; VLX-NEXT:    retq
18040;
18041; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18042; NoVLX:       # %bb.0: # %entry
18043; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18044; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
18045; NoVLX-NEXT:    kmovw %edi, %k1
18046; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18047; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18048; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18049; NoVLX-NEXT:    kmovw %k0, %eax
18050; NoVLX-NEXT:    vzeroupper
18051; NoVLX-NEXT:    retq
18052entry:
18053  %0 = bitcast <2 x i64> %__a to <2 x i64>
18054  %load = load <2 x i64>, ptr %__b
18055  %1 = bitcast <2 x i64> %load to <2 x i64>
18056  %2 = icmp ult <2 x i64> %0, %1
18057  %3 = bitcast i8 %__u to <8 x i1>
18058  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18059  %4 = and <2 x i1> %2, %extract.i
18060  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18061  %6 = bitcast <64 x i1> %5 to i64
18062  ret i64 %6
18063}
18064
18065
18066define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
18067; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18068; VLX:       # %bb.0: # %entry
18069; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18070; VLX-NEXT:    kmovq %k0, %rax
18071; VLX-NEXT:    retq
18072;
18073; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18074; NoVLX:       # %bb.0: # %entry
18075; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18076; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18077; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18078; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18079; NoVLX-NEXT:    kmovw %k0, %eax
18080; NoVLX-NEXT:    vzeroupper
18081; NoVLX-NEXT:    retq
18082entry:
18083  %0 = bitcast <2 x i64> %__a to <2 x i64>
18084  %load = load i64, ptr %__b
18085  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18086  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18087  %2 = icmp ult <2 x i64> %0, %1
18088  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18089  %4 = bitcast <64 x i1> %3 to i64
18090  ret i64 %4
18091}
18092
18093define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
18094; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18095; VLX:       # %bb.0: # %entry
18096; VLX-NEXT:    kmovd %edi, %k1
18097; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18098; VLX-NEXT:    kmovq %k0, %rax
18099; VLX-NEXT:    retq
18100;
18101; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18102; NoVLX:       # %bb.0: # %entry
18103; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18104; NoVLX-NEXT:    kmovw %edi, %k1
18105; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18106; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18107; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18108; NoVLX-NEXT:    kmovw %k0, %eax
18109; NoVLX-NEXT:    vzeroupper
18110; NoVLX-NEXT:    retq
18111entry:
18112  %0 = bitcast <2 x i64> %__a to <2 x i64>
18113  %load = load i64, ptr %__b
18114  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18115  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18116  %2 = icmp ult <2 x i64> %0, %1
18117  %3 = bitcast i8 %__u to <8 x i1>
18118  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18119  %4 = and <2 x i1> %extract.i, %2
18120  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18121  %6 = bitcast <64 x i1> %5 to i64
18122  ret i64 %6
18123}
18124
18125
18126define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18127; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18128; VLX:       # %bb.0: # %entry
18129; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18130; VLX-NEXT:    kmovd %k0, %eax
18131; VLX-NEXT:    # kill: def $al killed $al killed $eax
18132; VLX-NEXT:    vzeroupper
18133; VLX-NEXT:    retq
18134;
18135; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18136; NoVLX:       # %bb.0: # %entry
18137; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18138; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18139; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18140; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18141; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18142; NoVLX-NEXT:    kmovw %k0, %eax
18143; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18144; NoVLX-NEXT:    vzeroupper
18145; NoVLX-NEXT:    retq
18146entry:
18147  %0 = bitcast <4 x i64> %__a to <4 x i64>
18148  %1 = bitcast <4 x i64> %__b to <4 x i64>
18149  %2 = icmp ult <4 x i64> %0, %1
18150  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18151  %4 = bitcast <8 x i1> %3 to i8
18152  ret i8 %4
18153}
18154
18155define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18156; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18157; VLX:       # %bb.0: # %entry
18158; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18159; VLX-NEXT:    kmovd %k0, %eax
18160; VLX-NEXT:    # kill: def $al killed $al killed $eax
18161; VLX-NEXT:    vzeroupper
18162; VLX-NEXT:    retq
18163;
18164; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18165; NoVLX:       # %bb.0: # %entry
18166; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18167; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18168; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18169; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18170; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18171; NoVLX-NEXT:    kmovw %k0, %eax
18172; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18173; NoVLX-NEXT:    vzeroupper
18174; NoVLX-NEXT:    retq
18175entry:
18176  %0 = bitcast <4 x i64> %__a to <4 x i64>
18177  %load = load <4 x i64>, ptr %__b
18178  %1 = bitcast <4 x i64> %load to <4 x i64>
18179  %2 = icmp ult <4 x i64> %0, %1
18180  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18181  %4 = bitcast <8 x i1> %3 to i8
18182  ret i8 %4
18183}
18184
18185define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18186; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18187; VLX:       # %bb.0: # %entry
18188; VLX-NEXT:    kmovd %edi, %k1
18189; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18190; VLX-NEXT:    kmovd %k0, %eax
18191; VLX-NEXT:    # kill: def $al killed $al killed $eax
18192; VLX-NEXT:    vzeroupper
18193; VLX-NEXT:    retq
18194;
18195; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18196; NoVLX:       # %bb.0: # %entry
18197; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18198; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18199; NoVLX-NEXT:    kmovw %edi, %k1
18200; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18201; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18202; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18203; NoVLX-NEXT:    kmovw %k0, %eax
18204; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18205; NoVLX-NEXT:    vzeroupper
18206; NoVLX-NEXT:    retq
18207entry:
18208  %0 = bitcast <4 x i64> %__a to <4 x i64>
18209  %1 = bitcast <4 x i64> %__b to <4 x i64>
18210  %2 = icmp ult <4 x i64> %0, %1
18211  %3 = bitcast i8 %__u to <8 x i1>
18212  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18213  %4 = and <4 x i1> %2, %extract.i
18214  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18215  %6 = bitcast <8 x i1> %5 to i8
18216  ret i8 %6
18217}
18218
18219define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18220; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18221; VLX:       # %bb.0: # %entry
18222; VLX-NEXT:    kmovd %edi, %k1
18223; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18224; VLX-NEXT:    kmovd %k0, %eax
18225; VLX-NEXT:    # kill: def $al killed $al killed $eax
18226; VLX-NEXT:    vzeroupper
18227; VLX-NEXT:    retq
18228;
18229; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18230; NoVLX:       # %bb.0: # %entry
18231; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18232; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18233; NoVLX-NEXT:    kmovw %edi, %k1
18234; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18235; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18236; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18237; NoVLX-NEXT:    kmovw %k0, %eax
18238; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18239; NoVLX-NEXT:    vzeroupper
18240; NoVLX-NEXT:    retq
18241entry:
18242  %0 = bitcast <4 x i64> %__a to <4 x i64>
18243  %load = load <4 x i64>, ptr %__b
18244  %1 = bitcast <4 x i64> %load to <4 x i64>
18245  %2 = icmp ult <4 x i64> %0, %1
18246  %3 = bitcast i8 %__u to <8 x i1>
18247  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18248  %4 = and <4 x i1> %2, %extract.i
18249  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18250  %6 = bitcast <8 x i1> %5 to i8
18251  ret i8 %6
18252}
18253
18254
18255define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18256; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18257; VLX:       # %bb.0: # %entry
18258; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18259; VLX-NEXT:    kmovd %k0, %eax
18260; VLX-NEXT:    # kill: def $al killed $al killed $eax
18261; VLX-NEXT:    vzeroupper
18262; VLX-NEXT:    retq
18263;
18264; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18265; NoVLX:       # %bb.0: # %entry
18266; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18267; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18268; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18269; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18270; NoVLX-NEXT:    kmovw %k0, %eax
18271; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18272; NoVLX-NEXT:    vzeroupper
18273; NoVLX-NEXT:    retq
18274entry:
18275  %0 = bitcast <4 x i64> %__a to <4 x i64>
18276  %load = load i64, ptr %__b
18277  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18278  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18279  %2 = icmp ult <4 x i64> %0, %1
18280  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18281  %4 = bitcast <8 x i1> %3 to i8
18282  ret i8 %4
18283}
18284
18285define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18286; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18287; VLX:       # %bb.0: # %entry
18288; VLX-NEXT:    kmovd %edi, %k1
18289; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18290; VLX-NEXT:    kmovd %k0, %eax
18291; VLX-NEXT:    # kill: def $al killed $al killed $eax
18292; VLX-NEXT:    vzeroupper
18293; VLX-NEXT:    retq
18294;
18295; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18296; NoVLX:       # %bb.0: # %entry
18297; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18298; NoVLX-NEXT:    kmovw %edi, %k1
18299; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18300; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18301; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18302; NoVLX-NEXT:    kmovw %k0, %eax
18303; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18304; NoVLX-NEXT:    vzeroupper
18305; NoVLX-NEXT:    retq
18306entry:
18307  %0 = bitcast <4 x i64> %__a to <4 x i64>
18308  %load = load i64, ptr %__b
18309  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18310  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18311  %2 = icmp ult <4 x i64> %0, %1
18312  %3 = bitcast i8 %__u to <8 x i1>
18313  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18314  %4 = and <4 x i1> %extract.i, %2
18315  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18316  %6 = bitcast <8 x i1> %5 to i8
18317  ret i8 %6
18318}
18319
18320
18321define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18322; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18323; VLX:       # %bb.0: # %entry
18324; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18325; VLX-NEXT:    kmovd %k0, %eax
18326; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18327; VLX-NEXT:    vzeroupper
18328; VLX-NEXT:    retq
18329;
18330; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18331; NoVLX:       # %bb.0: # %entry
18332; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18333; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18334; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18335; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18336; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18337; NoVLX-NEXT:    kmovw %k0, %eax
18338; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18339; NoVLX-NEXT:    vzeroupper
18340; NoVLX-NEXT:    retq
18341entry:
18342  %0 = bitcast <4 x i64> %__a to <4 x i64>
18343  %1 = bitcast <4 x i64> %__b to <4 x i64>
18344  %2 = icmp ult <4 x i64> %0, %1
18345  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18346  %4 = bitcast <16 x i1> %3 to i16
18347  ret i16 %4
18348}
18349
18350define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18351; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18352; VLX:       # %bb.0: # %entry
18353; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18354; VLX-NEXT:    kmovd %k0, %eax
18355; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18356; VLX-NEXT:    vzeroupper
18357; VLX-NEXT:    retq
18358;
18359; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18360; NoVLX:       # %bb.0: # %entry
18361; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18362; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18363; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18364; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18365; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18366; NoVLX-NEXT:    kmovw %k0, %eax
18367; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18368; NoVLX-NEXT:    vzeroupper
18369; NoVLX-NEXT:    retq
18370entry:
18371  %0 = bitcast <4 x i64> %__a to <4 x i64>
18372  %load = load <4 x i64>, ptr %__b
18373  %1 = bitcast <4 x i64> %load to <4 x i64>
18374  %2 = icmp ult <4 x i64> %0, %1
18375  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18376  %4 = bitcast <16 x i1> %3 to i16
18377  ret i16 %4
18378}
18379
18380define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18381; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18382; VLX:       # %bb.0: # %entry
18383; VLX-NEXT:    kmovd %edi, %k1
18384; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18385; VLX-NEXT:    kmovd %k0, %eax
18386; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18387; VLX-NEXT:    vzeroupper
18388; VLX-NEXT:    retq
18389;
18390; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18391; NoVLX:       # %bb.0: # %entry
18392; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18393; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18394; NoVLX-NEXT:    kmovw %edi, %k1
18395; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18396; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18397; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18398; NoVLX-NEXT:    kmovw %k0, %eax
18399; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18400; NoVLX-NEXT:    vzeroupper
18401; NoVLX-NEXT:    retq
18402entry:
18403  %0 = bitcast <4 x i64> %__a to <4 x i64>
18404  %1 = bitcast <4 x i64> %__b to <4 x i64>
18405  %2 = icmp ult <4 x i64> %0, %1
18406  %3 = bitcast i8 %__u to <8 x i1>
18407  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18408  %4 = and <4 x i1> %2, %extract.i
18409  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18410  %6 = bitcast <16 x i1> %5 to i16
18411  ret i16 %6
18412}
18413
18414define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18415; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18416; VLX:       # %bb.0: # %entry
18417; VLX-NEXT:    kmovd %edi, %k1
18418; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18419; VLX-NEXT:    kmovd %k0, %eax
18420; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18421; VLX-NEXT:    vzeroupper
18422; VLX-NEXT:    retq
18423;
18424; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18425; NoVLX:       # %bb.0: # %entry
18426; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18427; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18428; NoVLX-NEXT:    kmovw %edi, %k1
18429; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18430; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18431; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18432; NoVLX-NEXT:    kmovw %k0, %eax
18433; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18434; NoVLX-NEXT:    vzeroupper
18435; NoVLX-NEXT:    retq
18436entry:
18437  %0 = bitcast <4 x i64> %__a to <4 x i64>
18438  %load = load <4 x i64>, ptr %__b
18439  %1 = bitcast <4 x i64> %load to <4 x i64>
18440  %2 = icmp ult <4 x i64> %0, %1
18441  %3 = bitcast i8 %__u to <8 x i1>
18442  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18443  %4 = and <4 x i1> %2, %extract.i
18444  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18445  %6 = bitcast <16 x i1> %5 to i16
18446  ret i16 %6
18447}
18448
18449
18450define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18451; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18452; VLX:       # %bb.0: # %entry
18453; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18454; VLX-NEXT:    kmovd %k0, %eax
18455; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18456; VLX-NEXT:    vzeroupper
18457; VLX-NEXT:    retq
18458;
18459; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18460; NoVLX:       # %bb.0: # %entry
18461; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18462; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18463; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18464; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18465; NoVLX-NEXT:    kmovw %k0, %eax
18466; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18467; NoVLX-NEXT:    vzeroupper
18468; NoVLX-NEXT:    retq
18469entry:
18470  %0 = bitcast <4 x i64> %__a to <4 x i64>
18471  %load = load i64, ptr %__b
18472  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18473  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18474  %2 = icmp ult <4 x i64> %0, %1
18475  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18476  %4 = bitcast <16 x i1> %3 to i16
18477  ret i16 %4
18478}
18479
18480define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18481; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18482; VLX:       # %bb.0: # %entry
18483; VLX-NEXT:    kmovd %edi, %k1
18484; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18485; VLX-NEXT:    kmovd %k0, %eax
18486; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18487; VLX-NEXT:    vzeroupper
18488; VLX-NEXT:    retq
18489;
18490; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18491; NoVLX:       # %bb.0: # %entry
18492; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18493; NoVLX-NEXT:    kmovw %edi, %k1
18494; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18495; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18496; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18497; NoVLX-NEXT:    kmovw %k0, %eax
18498; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18499; NoVLX-NEXT:    vzeroupper
18500; NoVLX-NEXT:    retq
18501entry:
18502  %0 = bitcast <4 x i64> %__a to <4 x i64>
18503  %load = load i64, ptr %__b
18504  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18505  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18506  %2 = icmp ult <4 x i64> %0, %1
18507  %3 = bitcast i8 %__u to <8 x i1>
18508  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18509  %4 = and <4 x i1> %extract.i, %2
18510  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18511  %6 = bitcast <16 x i1> %5 to i16
18512  ret i16 %6
18513}
18514
18515
18516define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18517; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18518; VLX:       # %bb.0: # %entry
18519; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18520; VLX-NEXT:    kmovd %k0, %eax
18521; VLX-NEXT:    vzeroupper
18522; VLX-NEXT:    retq
18523;
18524; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18525; NoVLX:       # %bb.0: # %entry
18526; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18527; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18528; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18529; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18530; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18531; NoVLX-NEXT:    kmovw %k0, %eax
18532; NoVLX-NEXT:    vzeroupper
18533; NoVLX-NEXT:    retq
18534entry:
18535  %0 = bitcast <4 x i64> %__a to <4 x i64>
18536  %1 = bitcast <4 x i64> %__b to <4 x i64>
18537  %2 = icmp ult <4 x i64> %0, %1
18538  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18539  %4 = bitcast <32 x i1> %3 to i32
18540  ret i32 %4
18541}
18542
18543define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18544; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18545; VLX:       # %bb.0: # %entry
18546; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18547; VLX-NEXT:    kmovd %k0, %eax
18548; VLX-NEXT:    vzeroupper
18549; VLX-NEXT:    retq
18550;
18551; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18552; NoVLX:       # %bb.0: # %entry
18553; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18554; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18555; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18556; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18557; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18558; NoVLX-NEXT:    kmovw %k0, %eax
18559; NoVLX-NEXT:    vzeroupper
18560; NoVLX-NEXT:    retq
18561entry:
18562  %0 = bitcast <4 x i64> %__a to <4 x i64>
18563  %load = load <4 x i64>, ptr %__b
18564  %1 = bitcast <4 x i64> %load to <4 x i64>
18565  %2 = icmp ult <4 x i64> %0, %1
18566  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18567  %4 = bitcast <32 x i1> %3 to i32
18568  ret i32 %4
18569}
18570
18571define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18572; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18573; VLX:       # %bb.0: # %entry
18574; VLX-NEXT:    kmovd %edi, %k1
18575; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18576; VLX-NEXT:    kmovd %k0, %eax
18577; VLX-NEXT:    vzeroupper
18578; VLX-NEXT:    retq
18579;
18580; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18581; NoVLX:       # %bb.0: # %entry
18582; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18583; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18584; NoVLX-NEXT:    kmovw %edi, %k1
18585; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18586; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18587; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18588; NoVLX-NEXT:    kmovw %k0, %eax
18589; NoVLX-NEXT:    vzeroupper
18590; NoVLX-NEXT:    retq
18591entry:
18592  %0 = bitcast <4 x i64> %__a to <4 x i64>
18593  %1 = bitcast <4 x i64> %__b to <4 x i64>
18594  %2 = icmp ult <4 x i64> %0, %1
18595  %3 = bitcast i8 %__u to <8 x i1>
18596  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18597  %4 = and <4 x i1> %2, %extract.i
18598  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18599  %6 = bitcast <32 x i1> %5 to i32
18600  ret i32 %6
18601}
18602
18603define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18604; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18605; VLX:       # %bb.0: # %entry
18606; VLX-NEXT:    kmovd %edi, %k1
18607; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18608; VLX-NEXT:    kmovd %k0, %eax
18609; VLX-NEXT:    vzeroupper
18610; VLX-NEXT:    retq
18611;
18612; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18613; NoVLX:       # %bb.0: # %entry
18614; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18615; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18616; NoVLX-NEXT:    kmovw %edi, %k1
18617; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18618; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18619; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18620; NoVLX-NEXT:    kmovw %k0, %eax
18621; NoVLX-NEXT:    vzeroupper
18622; NoVLX-NEXT:    retq
18623entry:
18624  %0 = bitcast <4 x i64> %__a to <4 x i64>
18625  %load = load <4 x i64>, ptr %__b
18626  %1 = bitcast <4 x i64> %load to <4 x i64>
18627  %2 = icmp ult <4 x i64> %0, %1
18628  %3 = bitcast i8 %__u to <8 x i1>
18629  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18630  %4 = and <4 x i1> %2, %extract.i
18631  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18632  %6 = bitcast <32 x i1> %5 to i32
18633  ret i32 %6
18634}
18635
18636
18637define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18638; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18639; VLX:       # %bb.0: # %entry
18640; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18641; VLX-NEXT:    kmovd %k0, %eax
18642; VLX-NEXT:    vzeroupper
18643; VLX-NEXT:    retq
18644;
18645; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18646; NoVLX:       # %bb.0: # %entry
18647; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18648; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18649; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18650; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18651; NoVLX-NEXT:    kmovw %k0, %eax
18652; NoVLX-NEXT:    vzeroupper
18653; NoVLX-NEXT:    retq
18654entry:
18655  %0 = bitcast <4 x i64> %__a to <4 x i64>
18656  %load = load i64, ptr %__b
18657  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18658  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18659  %2 = icmp ult <4 x i64> %0, %1
18660  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18661  %4 = bitcast <32 x i1> %3 to i32
18662  ret i32 %4
18663}
18664
18665define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18666; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18667; VLX:       # %bb.0: # %entry
18668; VLX-NEXT:    kmovd %edi, %k1
18669; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18670; VLX-NEXT:    kmovd %k0, %eax
18671; VLX-NEXT:    vzeroupper
18672; VLX-NEXT:    retq
18673;
18674; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18675; NoVLX:       # %bb.0: # %entry
18676; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18677; NoVLX-NEXT:    kmovw %edi, %k1
18678; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18679; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18680; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18681; NoVLX-NEXT:    kmovw %k0, %eax
18682; NoVLX-NEXT:    vzeroupper
18683; NoVLX-NEXT:    retq
18684entry:
18685  %0 = bitcast <4 x i64> %__a to <4 x i64>
18686  %load = load i64, ptr %__b
18687  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18688  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18689  %2 = icmp ult <4 x i64> %0, %1
18690  %3 = bitcast i8 %__u to <8 x i1>
18691  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18692  %4 = and <4 x i1> %extract.i, %2
18693  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18694  %6 = bitcast <32 x i1> %5 to i32
18695  ret i32 %6
18696}
18697
18698
18699define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18700; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18701; VLX:       # %bb.0: # %entry
18702; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18703; VLX-NEXT:    kmovq %k0, %rax
18704; VLX-NEXT:    vzeroupper
18705; VLX-NEXT:    retq
18706;
18707; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18708; NoVLX:       # %bb.0: # %entry
18709; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18710; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18711; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18712; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18713; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18714; NoVLX-NEXT:    kmovw %k0, %eax
18715; NoVLX-NEXT:    vzeroupper
18716; NoVLX-NEXT:    retq
18717entry:
18718  %0 = bitcast <4 x i64> %__a to <4 x i64>
18719  %1 = bitcast <4 x i64> %__b to <4 x i64>
18720  %2 = icmp ult <4 x i64> %0, %1
18721  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18722  %4 = bitcast <64 x i1> %3 to i64
18723  ret i64 %4
18724}
18725
18726define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18727; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18728; VLX:       # %bb.0: # %entry
18729; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18730; VLX-NEXT:    kmovq %k0, %rax
18731; VLX-NEXT:    vzeroupper
18732; VLX-NEXT:    retq
18733;
18734; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18735; NoVLX:       # %bb.0: # %entry
18736; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18737; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18738; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18739; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18740; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18741; NoVLX-NEXT:    kmovw %k0, %eax
18742; NoVLX-NEXT:    vzeroupper
18743; NoVLX-NEXT:    retq
18744entry:
18745  %0 = bitcast <4 x i64> %__a to <4 x i64>
18746  %load = load <4 x i64>, ptr %__b
18747  %1 = bitcast <4 x i64> %load to <4 x i64>
18748  %2 = icmp ult <4 x i64> %0, %1
18749  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18750  %4 = bitcast <64 x i1> %3 to i64
18751  ret i64 %4
18752}
18753
18754define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18755; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18756; VLX:       # %bb.0: # %entry
18757; VLX-NEXT:    kmovd %edi, %k1
18758; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18759; VLX-NEXT:    kmovq %k0, %rax
18760; VLX-NEXT:    vzeroupper
18761; VLX-NEXT:    retq
18762;
18763; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18764; NoVLX:       # %bb.0: # %entry
18765; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18766; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18767; NoVLX-NEXT:    kmovw %edi, %k1
18768; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18769; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18770; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18771; NoVLX-NEXT:    kmovw %k0, %eax
18772; NoVLX-NEXT:    vzeroupper
18773; NoVLX-NEXT:    retq
18774entry:
18775  %0 = bitcast <4 x i64> %__a to <4 x i64>
18776  %1 = bitcast <4 x i64> %__b to <4 x i64>
18777  %2 = icmp ult <4 x i64> %0, %1
18778  %3 = bitcast i8 %__u to <8 x i1>
18779  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18780  %4 = and <4 x i1> %2, %extract.i
18781  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18782  %6 = bitcast <64 x i1> %5 to i64
18783  ret i64 %6
18784}
18785
18786define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18787; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18788; VLX:       # %bb.0: # %entry
18789; VLX-NEXT:    kmovd %edi, %k1
18790; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18791; VLX-NEXT:    kmovq %k0, %rax
18792; VLX-NEXT:    vzeroupper
18793; VLX-NEXT:    retq
18794;
18795; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18796; NoVLX:       # %bb.0: # %entry
18797; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18798; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18799; NoVLX-NEXT:    kmovw %edi, %k1
18800; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18801; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18802; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18803; NoVLX-NEXT:    kmovw %k0, %eax
18804; NoVLX-NEXT:    vzeroupper
18805; NoVLX-NEXT:    retq
18806entry:
18807  %0 = bitcast <4 x i64> %__a to <4 x i64>
18808  %load = load <4 x i64>, ptr %__b
18809  %1 = bitcast <4 x i64> %load to <4 x i64>
18810  %2 = icmp ult <4 x i64> %0, %1
18811  %3 = bitcast i8 %__u to <8 x i1>
18812  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18813  %4 = and <4 x i1> %2, %extract.i
18814  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18815  %6 = bitcast <64 x i1> %5 to i64
18816  ret i64 %6
18817}
18818
18819
18820define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18821; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18822; VLX:       # %bb.0: # %entry
18823; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18824; VLX-NEXT:    kmovq %k0, %rax
18825; VLX-NEXT:    vzeroupper
18826; VLX-NEXT:    retq
18827;
18828; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18829; NoVLX:       # %bb.0: # %entry
18830; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18831; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18832; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18833; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18834; NoVLX-NEXT:    kmovw %k0, %eax
18835; NoVLX-NEXT:    vzeroupper
18836; NoVLX-NEXT:    retq
18837entry:
18838  %0 = bitcast <4 x i64> %__a to <4 x i64>
18839  %load = load i64, ptr %__b
18840  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18841  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18842  %2 = icmp ult <4 x i64> %0, %1
18843  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18844  %4 = bitcast <64 x i1> %3 to i64
18845  ret i64 %4
18846}
18847
18848define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18849; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18850; VLX:       # %bb.0: # %entry
18851; VLX-NEXT:    kmovd %edi, %k1
18852; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18853; VLX-NEXT:    kmovq %k0, %rax
18854; VLX-NEXT:    vzeroupper
18855; VLX-NEXT:    retq
18856;
18857; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18858; NoVLX:       # %bb.0: # %entry
18859; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18860; NoVLX-NEXT:    kmovw %edi, %k1
18861; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18862; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18863; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18864; NoVLX-NEXT:    kmovw %k0, %eax
18865; NoVLX-NEXT:    vzeroupper
18866; NoVLX-NEXT:    retq
18867entry:
18868  %0 = bitcast <4 x i64> %__a to <4 x i64>
18869  %load = load i64, ptr %__b
18870  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18871  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18872  %2 = icmp ult <4 x i64> %0, %1
18873  %3 = bitcast i8 %__u to <8 x i1>
18874  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18875  %4 = and <4 x i1> %extract.i, %2
18876  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18877  %6 = bitcast <64 x i1> %5 to i64
18878  ret i64 %6
18879}
18880
18881
18882define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18883; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18884; VLX:       # %bb.0: # %entry
18885; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18886; VLX-NEXT:    kmovd %k0, %eax
18887; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18888; VLX-NEXT:    vzeroupper
18889; VLX-NEXT:    retq
18890;
18891; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18892; NoVLX:       # %bb.0: # %entry
18893; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18894; NoVLX-NEXT:    kmovw %k0, %eax
18895; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18896; NoVLX-NEXT:    vzeroupper
18897; NoVLX-NEXT:    retq
18898entry:
18899  %0 = bitcast <8 x i64> %__a to <8 x i64>
18900  %1 = bitcast <8 x i64> %__b to <8 x i64>
18901  %2 = icmp ult <8 x i64> %0, %1
18902  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18903  %4 = bitcast <16 x i1> %3 to i16
18904  ret i16 %4
18905}
18906
18907define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
18908; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18909; VLX:       # %bb.0: # %entry
18910; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
18911; VLX-NEXT:    kmovd %k0, %eax
18912; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18913; VLX-NEXT:    vzeroupper
18914; VLX-NEXT:    retq
18915;
18916; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18917; NoVLX:       # %bb.0: # %entry
18918; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
18919; NoVLX-NEXT:    kmovw %k0, %eax
18920; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18921; NoVLX-NEXT:    vzeroupper
18922; NoVLX-NEXT:    retq
18923entry:
18924  %0 = bitcast <8 x i64> %__a to <8 x i64>
18925  %load = load <8 x i64>, ptr %__b
18926  %1 = bitcast <8 x i64> %load to <8 x i64>
18927  %2 = icmp ult <8 x i64> %0, %1
18928  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18929  %4 = bitcast <16 x i1> %3 to i16
18930  ret i16 %4
18931}
18932
18933define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18934; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18935; VLX:       # %bb.0: # %entry
18936; VLX-NEXT:    kmovd %edi, %k1
18937; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18938; VLX-NEXT:    kmovd %k0, %eax
18939; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18940; VLX-NEXT:    vzeroupper
18941; VLX-NEXT:    retq
18942;
18943; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18944; NoVLX:       # %bb.0: # %entry
18945; NoVLX-NEXT:    kmovw %edi, %k1
18946; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18947; NoVLX-NEXT:    kmovw %k0, %eax
18948; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18949; NoVLX-NEXT:    vzeroupper
18950; NoVLX-NEXT:    retq
18951entry:
18952  %0 = bitcast <8 x i64> %__a to <8 x i64>
18953  %1 = bitcast <8 x i64> %__b to <8 x i64>
18954  %2 = icmp ult <8 x i64> %0, %1
18955  %3 = bitcast i8 %__u to <8 x i1>
18956  %4 = and <8 x i1> %2, %3
18957  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18958  %6 = bitcast <16 x i1> %5 to i16
18959  ret i16 %6
18960}
18961
18962define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
18963; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18964; VLX:       # %bb.0: # %entry
18965; VLX-NEXT:    kmovd %edi, %k1
18966; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18967; VLX-NEXT:    kmovd %k0, %eax
18968; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18969; VLX-NEXT:    vzeroupper
18970; VLX-NEXT:    retq
18971;
18972; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18973; NoVLX:       # %bb.0: # %entry
18974; NoVLX-NEXT:    kmovw %edi, %k1
18975; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18976; NoVLX-NEXT:    kmovw %k0, %eax
18977; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18978; NoVLX-NEXT:    vzeroupper
18979; NoVLX-NEXT:    retq
18980entry:
18981  %0 = bitcast <8 x i64> %__a to <8 x i64>
18982  %load = load <8 x i64>, ptr %__b
18983  %1 = bitcast <8 x i64> %load to <8 x i64>
18984  %2 = icmp ult <8 x i64> %0, %1
18985  %3 = bitcast i8 %__u to <8 x i1>
18986  %4 = and <8 x i1> %2, %3
18987  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18988  %6 = bitcast <16 x i1> %5 to i16
18989  ret i16 %6
18990}
18991
18992
18993define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
18994; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
18995; VLX:       # %bb.0: # %entry
18996; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18997; VLX-NEXT:    kmovd %k0, %eax
18998; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18999; VLX-NEXT:    vzeroupper
19000; VLX-NEXT:    retq
19001;
19002; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19003; NoVLX:       # %bb.0: # %entry
19004; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19005; NoVLX-NEXT:    kmovw %k0, %eax
19006; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19007; NoVLX-NEXT:    vzeroupper
19008; NoVLX-NEXT:    retq
19009entry:
19010  %0 = bitcast <8 x i64> %__a to <8 x i64>
19011  %load = load i64, ptr %__b
19012  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19013  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19014  %2 = icmp ult <8 x i64> %0, %1
19015  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19016  %4 = bitcast <16 x i1> %3 to i16
19017  ret i16 %4
19018}
19019
19020define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19021; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19022; VLX:       # %bb.0: # %entry
19023; VLX-NEXT:    kmovd %edi, %k1
19024; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19025; VLX-NEXT:    kmovd %k0, %eax
19026; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19027; VLX-NEXT:    vzeroupper
19028; VLX-NEXT:    retq
19029;
19030; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19031; NoVLX:       # %bb.0: # %entry
19032; NoVLX-NEXT:    kmovw %edi, %k1
19033; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19034; NoVLX-NEXT:    kmovw %k0, %eax
19035; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19036; NoVLX-NEXT:    vzeroupper
19037; NoVLX-NEXT:    retq
19038entry:
19039  %0 = bitcast <8 x i64> %__a to <8 x i64>
19040  %load = load i64, ptr %__b
19041  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19042  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19043  %2 = icmp ult <8 x i64> %0, %1
19044  %3 = bitcast i8 %__u to <8 x i1>
19045  %4 = and <8 x i1> %3, %2
19046  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19047  %6 = bitcast <16 x i1> %5 to i16
19048  ret i16 %6
19049}
19050
19051
19052define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19053; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19054; VLX:       # %bb.0: # %entry
19055; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19056; VLX-NEXT:    kmovd %k0, %eax
19057; VLX-NEXT:    vzeroupper
19058; VLX-NEXT:    retq
19059;
19060; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19061; NoVLX:       # %bb.0: # %entry
19062; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19063; NoVLX-NEXT:    kmovw %k0, %eax
19064; NoVLX-NEXT:    vzeroupper
19065; NoVLX-NEXT:    retq
19066entry:
19067  %0 = bitcast <8 x i64> %__a to <8 x i64>
19068  %1 = bitcast <8 x i64> %__b to <8 x i64>
19069  %2 = icmp ult <8 x i64> %0, %1
19070  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19071  %4 = bitcast <32 x i1> %3 to i32
19072  ret i32 %4
19073}
19074
19075define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19076; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19077; VLX:       # %bb.0: # %entry
19078; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19079; VLX-NEXT:    kmovd %k0, %eax
19080; VLX-NEXT:    vzeroupper
19081; VLX-NEXT:    retq
19082;
19083; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19084; NoVLX:       # %bb.0: # %entry
19085; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19086; NoVLX-NEXT:    kmovw %k0, %eax
19087; NoVLX-NEXT:    vzeroupper
19088; NoVLX-NEXT:    retq
19089entry:
19090  %0 = bitcast <8 x i64> %__a to <8 x i64>
19091  %load = load <8 x i64>, ptr %__b
19092  %1 = bitcast <8 x i64> %load to <8 x i64>
19093  %2 = icmp ult <8 x i64> %0, %1
19094  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19095  %4 = bitcast <32 x i1> %3 to i32
19096  ret i32 %4
19097}
19098
19099define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19100; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19101; VLX:       # %bb.0: # %entry
19102; VLX-NEXT:    kmovd %edi, %k1
19103; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19104; VLX-NEXT:    kmovd %k0, %eax
19105; VLX-NEXT:    vzeroupper
19106; VLX-NEXT:    retq
19107;
19108; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19109; NoVLX:       # %bb.0: # %entry
19110; NoVLX-NEXT:    kmovw %edi, %k1
19111; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19112; NoVLX-NEXT:    kmovw %k0, %eax
19113; NoVLX-NEXT:    vzeroupper
19114; NoVLX-NEXT:    retq
19115entry:
19116  %0 = bitcast <8 x i64> %__a to <8 x i64>
19117  %1 = bitcast <8 x i64> %__b to <8 x i64>
19118  %2 = icmp ult <8 x i64> %0, %1
19119  %3 = bitcast i8 %__u to <8 x i1>
19120  %4 = and <8 x i1> %2, %3
19121  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19122  %6 = bitcast <32 x i1> %5 to i32
19123  ret i32 %6
19124}
19125
19126define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19127; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19128; VLX:       # %bb.0: # %entry
19129; VLX-NEXT:    kmovd %edi, %k1
19130; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19131; VLX-NEXT:    kmovd %k0, %eax
19132; VLX-NEXT:    vzeroupper
19133; VLX-NEXT:    retq
19134;
19135; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19136; NoVLX:       # %bb.0: # %entry
19137; NoVLX-NEXT:    kmovw %edi, %k1
19138; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19139; NoVLX-NEXT:    kmovw %k0, %eax
19140; NoVLX-NEXT:    vzeroupper
19141; NoVLX-NEXT:    retq
19142entry:
19143  %0 = bitcast <8 x i64> %__a to <8 x i64>
19144  %load = load <8 x i64>, ptr %__b
19145  %1 = bitcast <8 x i64> %load to <8 x i64>
19146  %2 = icmp ult <8 x i64> %0, %1
19147  %3 = bitcast i8 %__u to <8 x i1>
19148  %4 = and <8 x i1> %2, %3
19149  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19150  %6 = bitcast <32 x i1> %5 to i32
19151  ret i32 %6
19152}
19153
19154
19155define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19156; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19157; VLX:       # %bb.0: # %entry
19158; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19159; VLX-NEXT:    kmovd %k0, %eax
19160; VLX-NEXT:    vzeroupper
19161; VLX-NEXT:    retq
19162;
19163; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19164; NoVLX:       # %bb.0: # %entry
19165; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19166; NoVLX-NEXT:    kmovw %k0, %eax
19167; NoVLX-NEXT:    vzeroupper
19168; NoVLX-NEXT:    retq
19169entry:
19170  %0 = bitcast <8 x i64> %__a to <8 x i64>
19171  %load = load i64, ptr %__b
19172  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19173  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19174  %2 = icmp ult <8 x i64> %0, %1
19175  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19176  %4 = bitcast <32 x i1> %3 to i32
19177  ret i32 %4
19178}
19179
19180define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19181; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19182; VLX:       # %bb.0: # %entry
19183; VLX-NEXT:    kmovd %edi, %k1
19184; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19185; VLX-NEXT:    kmovd %k0, %eax
19186; VLX-NEXT:    vzeroupper
19187; VLX-NEXT:    retq
19188;
19189; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19190; NoVLX:       # %bb.0: # %entry
19191; NoVLX-NEXT:    kmovw %edi, %k1
19192; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19193; NoVLX-NEXT:    kmovw %k0, %eax
19194; NoVLX-NEXT:    vzeroupper
19195; NoVLX-NEXT:    retq
19196entry:
19197  %0 = bitcast <8 x i64> %__a to <8 x i64>
19198  %load = load i64, ptr %__b
19199  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19200  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19201  %2 = icmp ult <8 x i64> %0, %1
19202  %3 = bitcast i8 %__u to <8 x i1>
19203  %4 = and <8 x i1> %3, %2
19204  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19205  %6 = bitcast <32 x i1> %5 to i32
19206  ret i32 %6
19207}
19208
19209
19210define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19211; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19212; VLX:       # %bb.0: # %entry
19213; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19214; VLX-NEXT:    kmovq %k0, %rax
19215; VLX-NEXT:    vzeroupper
19216; VLX-NEXT:    retq
19217;
19218; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19219; NoVLX:       # %bb.0: # %entry
19220; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19221; NoVLX-NEXT:    kmovw %k0, %eax
19222; NoVLX-NEXT:    vzeroupper
19223; NoVLX-NEXT:    retq
19224entry:
19225  %0 = bitcast <8 x i64> %__a to <8 x i64>
19226  %1 = bitcast <8 x i64> %__b to <8 x i64>
19227  %2 = icmp ult <8 x i64> %0, %1
19228  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19229  %4 = bitcast <64 x i1> %3 to i64
19230  ret i64 %4
19231}
19232
19233define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19234; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19235; VLX:       # %bb.0: # %entry
19236; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19237; VLX-NEXT:    kmovq %k0, %rax
19238; VLX-NEXT:    vzeroupper
19239; VLX-NEXT:    retq
19240;
19241; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19242; NoVLX:       # %bb.0: # %entry
19243; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19244; NoVLX-NEXT:    kmovw %k0, %eax
19245; NoVLX-NEXT:    vzeroupper
19246; NoVLX-NEXT:    retq
19247entry:
19248  %0 = bitcast <8 x i64> %__a to <8 x i64>
19249  %load = load <8 x i64>, ptr %__b
19250  %1 = bitcast <8 x i64> %load to <8 x i64>
19251  %2 = icmp ult <8 x i64> %0, %1
19252  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19253  %4 = bitcast <64 x i1> %3 to i64
19254  ret i64 %4
19255}
19256
19257define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19258; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19259; VLX:       # %bb.0: # %entry
19260; VLX-NEXT:    kmovd %edi, %k1
19261; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19262; VLX-NEXT:    kmovq %k0, %rax
19263; VLX-NEXT:    vzeroupper
19264; VLX-NEXT:    retq
19265;
19266; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19267; NoVLX:       # %bb.0: # %entry
19268; NoVLX-NEXT:    kmovw %edi, %k1
19269; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19270; NoVLX-NEXT:    kmovw %k0, %eax
19271; NoVLX-NEXT:    vzeroupper
19272; NoVLX-NEXT:    retq
19273entry:
19274  %0 = bitcast <8 x i64> %__a to <8 x i64>
19275  %1 = bitcast <8 x i64> %__b to <8 x i64>
19276  %2 = icmp ult <8 x i64> %0, %1
19277  %3 = bitcast i8 %__u to <8 x i1>
19278  %4 = and <8 x i1> %2, %3
19279  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19280  %6 = bitcast <64 x i1> %5 to i64
19281  ret i64 %6
19282}
19283
19284define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19285; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19286; VLX:       # %bb.0: # %entry
19287; VLX-NEXT:    kmovd %edi, %k1
19288; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19289; VLX-NEXT:    kmovq %k0, %rax
19290; VLX-NEXT:    vzeroupper
19291; VLX-NEXT:    retq
19292;
19293; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19294; NoVLX:       # %bb.0: # %entry
19295; NoVLX-NEXT:    kmovw %edi, %k1
19296; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19297; NoVLX-NEXT:    kmovw %k0, %eax
19298; NoVLX-NEXT:    vzeroupper
19299; NoVLX-NEXT:    retq
19300entry:
19301  %0 = bitcast <8 x i64> %__a to <8 x i64>
19302  %load = load <8 x i64>, ptr %__b
19303  %1 = bitcast <8 x i64> %load to <8 x i64>
19304  %2 = icmp ult <8 x i64> %0, %1
19305  %3 = bitcast i8 %__u to <8 x i1>
19306  %4 = and <8 x i1> %2, %3
19307  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19308  %6 = bitcast <64 x i1> %5 to i64
19309  ret i64 %6
19310}
19311
19312
19313define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19314; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19315; VLX:       # %bb.0: # %entry
19316; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19317; VLX-NEXT:    kmovq %k0, %rax
19318; VLX-NEXT:    vzeroupper
19319; VLX-NEXT:    retq
19320;
19321; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19322; NoVLX:       # %bb.0: # %entry
19323; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19324; NoVLX-NEXT:    kmovw %k0, %eax
19325; NoVLX-NEXT:    vzeroupper
19326; NoVLX-NEXT:    retq
19327entry:
19328  %0 = bitcast <8 x i64> %__a to <8 x i64>
19329  %load = load i64, ptr %__b
19330  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19331  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19332  %2 = icmp ult <8 x i64> %0, %1
19333  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19334  %4 = bitcast <64 x i1> %3 to i64
19335  ret i64 %4
19336}
19337
19338define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19339; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19340; VLX:       # %bb.0: # %entry
19341; VLX-NEXT:    kmovd %edi, %k1
19342; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19343; VLX-NEXT:    kmovq %k0, %rax
19344; VLX-NEXT:    vzeroupper
19345; VLX-NEXT:    retq
19346;
19347; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19348; NoVLX:       # %bb.0: # %entry
19349; NoVLX-NEXT:    kmovw %edi, %k1
19350; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19351; NoVLX-NEXT:    kmovw %k0, %eax
19352; NoVLX-NEXT:    vzeroupper
19353; NoVLX-NEXT:    retq
19354entry:
19355  %0 = bitcast <8 x i64> %__a to <8 x i64>
19356  %load = load i64, ptr %__b
19357  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19358  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19359  %2 = icmp ult <8 x i64> %0, %1
19360  %3 = bitcast i8 %__u to <8 x i1>
19361  %4 = and <8 x i1> %3, %2
19362  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19363  %6 = bitcast <64 x i1> %5 to i64
19364  ret i64 %6
19365}
19366
19367
19368declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32)
19369define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19370; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19371; VLX:       # %bb.0: # %entry
19372; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19373; VLX-NEXT:    kmovd %k0, %eax
19374; VLX-NEXT:    # kill: def $al killed $al killed $eax
19375; VLX-NEXT:    retq
19376;
19377; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19378; NoVLX:       # %bb.0: # %entry
19379; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19380; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19381; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19382; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19383; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19384; NoVLX-NEXT:    kmovw %k0, %eax
19385; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19386; NoVLX-NEXT:    vzeroupper
19387; NoVLX-NEXT:    retq
19388entry:
19389  %0 = bitcast <2 x i64> %__a to <4 x float>
19390  %1 = bitcast <2 x i64> %__b to <4 x float>
19391  %2 = fcmp oeq <4 x float> %0, %1
19392  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19393  %4 = bitcast <8 x i1> %3 to i8
19394  ret i8 %4
19395}
19396
19397define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19398; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19399; VLX:       # %bb.0: # %entry
19400; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19401; VLX-NEXT:    kmovd %k0, %eax
19402; VLX-NEXT:    # kill: def $al killed $al killed $eax
19403; VLX-NEXT:    retq
19404;
19405; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19406; NoVLX:       # %bb.0: # %entry
19407; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19408; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19409; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19410; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19411; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19412; NoVLX-NEXT:    kmovw %k0, %eax
19413; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19414; NoVLX-NEXT:    vzeroupper
19415; NoVLX-NEXT:    retq
19416entry:
19417  %0 = bitcast <2 x i64> %__a to <4 x float>
19418  %load = load <2 x i64>, ptr %__b
19419  %1 = bitcast <2 x i64> %load to <4 x float>
19420  %2 = fcmp oeq <4 x float> %0, %1
19421  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19422  %4 = bitcast <8 x i1> %3 to i8
19423  ret i8 %4
19424}
19425
19426define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19427; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19428; VLX:       # %bb.0: # %entry
19429; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
19430; VLX-NEXT:    kmovd %k0, %eax
19431; VLX-NEXT:    # kill: def $al killed $al killed $eax
19432; VLX-NEXT:    retq
19433;
19434; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19435; NoVLX:       # %bb.0: # %entry
19436; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19437; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
19438; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19439; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19440; NoVLX-NEXT:    kmovw %k0, %eax
19441; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19442; NoVLX-NEXT:    vzeroupper
19443; NoVLX-NEXT:    retq
19444entry:
19445  %0 = bitcast <2 x i64> %__a to <4 x float>
19446  %load = load float, ptr %__b
19447  %vec = insertelement <4 x float> undef, float %load, i32 0
19448  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19449  %2 = fcmp oeq <4 x float> %0, %1
19450  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19451  %4 = bitcast <8 x i1> %3 to i8
19452  ret i8 %4
19453}
19454
19455define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19456; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19457; VLX:       # %bb.0: # %entry
19458; VLX-NEXT:    kmovd %edi, %k1
19459; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19460; VLX-NEXT:    kmovd %k0, %eax
19461; VLX-NEXT:    # kill: def $al killed $al killed $eax
19462; VLX-NEXT:    retq
19463;
19464; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19465; NoVLX:       # %bb.0: # %entry
19466; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19467; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19468; NoVLX-NEXT:    kmovw %edi, %k1
19469; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19470; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19471; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19472; NoVLX-NEXT:    kmovw %k0, %eax
19473; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19474; NoVLX-NEXT:    vzeroupper
19475; NoVLX-NEXT:    retq
19476entry:
19477  %0 = bitcast <2 x i64> %__a to <4 x float>
19478  %1 = bitcast <2 x i64> %__b to <4 x float>
19479  %2 = fcmp oeq <4 x float> %0, %1
19480  %3 = bitcast i4 %__u to <4 x i1>
19481  %4 = and <4 x i1> %2, %3
19482  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19483  %6 = bitcast <8 x i1> %5 to i8
19484  ret i8 %6
19485}
19486
19487define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19488; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19489; VLX:       # %bb.0: # %entry
19490; VLX-NEXT:    kmovd %edi, %k1
19491; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19492; VLX-NEXT:    kmovd %k0, %eax
19493; VLX-NEXT:    # kill: def $al killed $al killed $eax
19494; VLX-NEXT:    retq
19495;
19496; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19497; NoVLX:       # %bb.0: # %entry
19498; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19499; NoVLX-NEXT:    kmovw %edi, %k1
19500; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
19501; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19502; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19503; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19504; NoVLX-NEXT:    kmovw %k0, %eax
19505; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19506; NoVLX-NEXT:    vzeroupper
19507; NoVLX-NEXT:    retq
19508entry:
19509  %0 = bitcast <2 x i64> %__a to <4 x float>
19510  %load = load <2 x i64>, ptr %__b
19511  %1 = bitcast <2 x i64> %load to <4 x float>
19512  %2 = fcmp oeq <4 x float> %0, %1
19513  %3 = bitcast i4 %__u to <4 x i1>
19514  %4 = and <4 x i1> %2, %3
19515  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19516  %6 = bitcast <8 x i1> %5 to i8
19517  ret i8 %6
19518}
19519
19520define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19521; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19522; VLX:       # %bb.0: # %entry
19523; VLX-NEXT:    kmovd %edi, %k1
19524; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19525; VLX-NEXT:    kmovd %k0, %eax
19526; VLX-NEXT:    # kill: def $al killed $al killed $eax
19527; VLX-NEXT:    retq
19528;
19529; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19530; NoVLX:       # %bb.0: # %entry
19531; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19532; NoVLX-NEXT:    kmovw %edi, %k1
19533; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19534; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19535; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19536; NoVLX-NEXT:    kmovw %k0, %eax
19537; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19538; NoVLX-NEXT:    vzeroupper
19539; NoVLX-NEXT:    retq
19540entry:
19541  %0 = bitcast <2 x i64> %__a to <4 x float>
19542  %load = load float, ptr %__b
19543  %vec = insertelement <4 x float> undef, float %load, i32 0
19544  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19545  %2 = fcmp oeq <4 x float> %0, %1
19546  %3 = bitcast i4 %__u to <4 x i1>
19547  %4 = and <4 x i1> %2, %3
19548  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19549  %6 = bitcast <8 x i1> %5 to i8
19550  ret i8 %6
19551}
19552
19553
19554
19555define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19556; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19557; VLX:       # %bb.0: # %entry
19558; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19559; VLX-NEXT:    kmovd %k0, %eax
19560; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19561; VLX-NEXT:    retq
19562;
19563; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19564; NoVLX:       # %bb.0: # %entry
19565; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19566; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19567; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19568; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19569; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19570; NoVLX-NEXT:    kmovw %k0, %eax
19571; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19572; NoVLX-NEXT:    vzeroupper
19573; NoVLX-NEXT:    retq
19574entry:
19575  %0 = bitcast <2 x i64> %__a to <4 x float>
19576  %1 = bitcast <2 x i64> %__b to <4 x float>
19577  %2 = fcmp oeq <4 x float> %0, %1
19578  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19579  %4 = bitcast <16 x i1> %3 to i16
19580  ret i16 %4
19581}
19582
19583define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19584; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19585; VLX:       # %bb.0: # %entry
19586; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19587; VLX-NEXT:    kmovd %k0, %eax
19588; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19589; VLX-NEXT:    retq
19590;
19591; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19592; NoVLX:       # %bb.0: # %entry
19593; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19594; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19595; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19596; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19597; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19598; NoVLX-NEXT:    kmovw %k0, %eax
19599; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19600; NoVLX-NEXT:    vzeroupper
19601; NoVLX-NEXT:    retq
19602entry:
19603  %0 = bitcast <2 x i64> %__a to <4 x float>
19604  %load = load <2 x i64>, ptr %__b
19605  %1 = bitcast <2 x i64> %load to <4 x float>
19606  %2 = fcmp oeq <4 x float> %0, %1
19607  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19608  %4 = bitcast <16 x i1> %3 to i16
19609  ret i16 %4
19610}
19611
19612define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19613; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19614; VLX:       # %bb.0: # %entry
19615; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
19616; VLX-NEXT:    kmovd %k0, %eax
19617; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19618; VLX-NEXT:    retq
19619;
19620; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19621; NoVLX:       # %bb.0: # %entry
19622; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19623; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
19624; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19625; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19626; NoVLX-NEXT:    kmovw %k0, %eax
19627; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19628; NoVLX-NEXT:    vzeroupper
19629; NoVLX-NEXT:    retq
19630entry:
19631  %0 = bitcast <2 x i64> %__a to <4 x float>
19632  %load = load float, ptr %__b
19633  %vec = insertelement <4 x float> undef, float %load, i32 0
19634  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19635  %2 = fcmp oeq <4 x float> %0, %1
19636  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19637  %4 = bitcast <16 x i1> %3 to i16
19638  ret i16 %4
19639}
19640
19641define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19642; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19643; VLX:       # %bb.0: # %entry
19644; VLX-NEXT:    kmovd %edi, %k1
19645; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19646; VLX-NEXT:    kmovd %k0, %eax
19647; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19648; VLX-NEXT:    retq
19649;
19650; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19651; NoVLX:       # %bb.0: # %entry
19652; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19653; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19654; NoVLX-NEXT:    kmovw %edi, %k1
19655; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19656; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19657; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19658; NoVLX-NEXT:    kmovw %k0, %eax
19659; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19660; NoVLX-NEXT:    vzeroupper
19661; NoVLX-NEXT:    retq
19662entry:
19663  %0 = bitcast <2 x i64> %__a to <4 x float>
19664  %1 = bitcast <2 x i64> %__b to <4 x float>
19665  %2 = fcmp oeq <4 x float> %0, %1
19666  %3 = bitcast i4 %__u to <4 x i1>
19667  %4 = and <4 x i1> %2, %3
19668  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19669  %6 = bitcast <16 x i1> %5 to i16
19670  ret i16 %6
19671}
19672
19673define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19674; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19675; VLX:       # %bb.0: # %entry
19676; VLX-NEXT:    kmovd %edi, %k1
19677; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19678; VLX-NEXT:    kmovd %k0, %eax
19679; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19680; VLX-NEXT:    retq
19681;
19682; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19683; NoVLX:       # %bb.0: # %entry
19684; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19685; NoVLX-NEXT:    kmovw %edi, %k1
19686; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
19687; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19688; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19689; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19690; NoVLX-NEXT:    kmovw %k0, %eax
19691; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19692; NoVLX-NEXT:    vzeroupper
19693; NoVLX-NEXT:    retq
19694entry:
19695  %0 = bitcast <2 x i64> %__a to <4 x float>
19696  %load = load <2 x i64>, ptr %__b
19697  %1 = bitcast <2 x i64> %load to <4 x float>
19698  %2 = fcmp oeq <4 x float> %0, %1
19699  %3 = bitcast i4 %__u to <4 x i1>
19700  %4 = and <4 x i1> %2, %3
19701  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19702  %6 = bitcast <16 x i1> %5 to i16
19703  ret i16 %6
19704}
19705
19706define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19707; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19708; VLX:       # %bb.0: # %entry
19709; VLX-NEXT:    kmovd %edi, %k1
19710; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19711; VLX-NEXT:    kmovd %k0, %eax
19712; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19713; VLX-NEXT:    retq
19714;
19715; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19716; NoVLX:       # %bb.0: # %entry
19717; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19718; NoVLX-NEXT:    kmovw %edi, %k1
19719; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19720; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19721; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19722; NoVLX-NEXT:    kmovw %k0, %eax
19723; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19724; NoVLX-NEXT:    vzeroupper
19725; NoVLX-NEXT:    retq
19726entry:
19727  %0 = bitcast <2 x i64> %__a to <4 x float>
19728  %load = load float, ptr %__b
19729  %vec = insertelement <4 x float> undef, float %load, i32 0
19730  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19731  %2 = fcmp oeq <4 x float> %0, %1
19732  %3 = bitcast i4 %__u to <4 x i1>
19733  %4 = and <4 x i1> %2, %3
19734  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19735  %6 = bitcast <16 x i1> %5 to i16
19736  ret i16 %6
19737}
19738
19739
19740
19741define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19742; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19743; VLX:       # %bb.0: # %entry
19744; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19745; VLX-NEXT:    kmovd %k0, %eax
19746; VLX-NEXT:    retq
19747;
19748; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19749; NoVLX:       # %bb.0: # %entry
19750; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19751; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19752; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19753; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19754; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19755; NoVLX-NEXT:    kmovw %k0, %eax
19756; NoVLX-NEXT:    vzeroupper
19757; NoVLX-NEXT:    retq
19758entry:
19759  %0 = bitcast <2 x i64> %__a to <4 x float>
19760  %1 = bitcast <2 x i64> %__b to <4 x float>
19761  %2 = fcmp oeq <4 x float> %0, %1
19762  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19763  %4 = bitcast <32 x i1> %3 to i32
19764  ret i32 %4
19765}
19766
19767define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19768; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19769; VLX:       # %bb.0: # %entry
19770; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19771; VLX-NEXT:    kmovd %k0, %eax
19772; VLX-NEXT:    retq
19773;
19774; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19775; NoVLX:       # %bb.0: # %entry
19776; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19777; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19778; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19779; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19780; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19781; NoVLX-NEXT:    kmovw %k0, %eax
19782; NoVLX-NEXT:    vzeroupper
19783; NoVLX-NEXT:    retq
19784entry:
19785  %0 = bitcast <2 x i64> %__a to <4 x float>
19786  %load = load <2 x i64>, ptr %__b
19787  %1 = bitcast <2 x i64> %load to <4 x float>
19788  %2 = fcmp oeq <4 x float> %0, %1
19789  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19790  %4 = bitcast <32 x i1> %3 to i32
19791  ret i32 %4
19792}
19793
19794define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19795; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19796; VLX:       # %bb.0: # %entry
19797; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
19798; VLX-NEXT:    kmovd %k0, %eax
19799; VLX-NEXT:    retq
19800;
19801; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19802; NoVLX:       # %bb.0: # %entry
19803; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19804; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
19805; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19806; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19807; NoVLX-NEXT:    kmovw %k0, %eax
19808; NoVLX-NEXT:    vzeroupper
19809; NoVLX-NEXT:    retq
19810entry:
19811  %0 = bitcast <2 x i64> %__a to <4 x float>
19812  %load = load float, ptr %__b
19813  %vec = insertelement <4 x float> undef, float %load, i32 0
19814  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19815  %2 = fcmp oeq <4 x float> %0, %1
19816  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19817  %4 = bitcast <32 x i1> %3 to i32
19818  ret i32 %4
19819}
19820
19821define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19822; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19823; VLX:       # %bb.0: # %entry
19824; VLX-NEXT:    kmovd %edi, %k1
19825; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19826; VLX-NEXT:    kmovd %k0, %eax
19827; VLX-NEXT:    retq
19828;
19829; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19830; NoVLX:       # %bb.0: # %entry
19831; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19832; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19833; NoVLX-NEXT:    kmovw %edi, %k1
19834; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19835; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19836; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19837; NoVLX-NEXT:    kmovw %k0, %eax
19838; NoVLX-NEXT:    vzeroupper
19839; NoVLX-NEXT:    retq
19840entry:
19841  %0 = bitcast <2 x i64> %__a to <4 x float>
19842  %1 = bitcast <2 x i64> %__b to <4 x float>
19843  %2 = fcmp oeq <4 x float> %0, %1
19844  %3 = bitcast i4 %__u to <4 x i1>
19845  %4 = and <4 x i1> %2, %3
19846  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19847  %6 = bitcast <32 x i1> %5 to i32
19848  ret i32 %6
19849}
19850
19851define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19852; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19853; VLX:       # %bb.0: # %entry
19854; VLX-NEXT:    kmovd %edi, %k1
19855; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19856; VLX-NEXT:    kmovd %k0, %eax
19857; VLX-NEXT:    retq
19858;
19859; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19860; NoVLX:       # %bb.0: # %entry
19861; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19862; NoVLX-NEXT:    kmovw %edi, %k1
19863; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
19864; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19865; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19866; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19867; NoVLX-NEXT:    kmovw %k0, %eax
19868; NoVLX-NEXT:    vzeroupper
19869; NoVLX-NEXT:    retq
19870entry:
19871  %0 = bitcast <2 x i64> %__a to <4 x float>
19872  %load = load <2 x i64>, ptr %__b
19873  %1 = bitcast <2 x i64> %load to <4 x float>
19874  %2 = fcmp oeq <4 x float> %0, %1
19875  %3 = bitcast i4 %__u to <4 x i1>
19876  %4 = and <4 x i1> %2, %3
19877  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19878  %6 = bitcast <32 x i1> %5 to i32
19879  ret i32 %6
19880}
19881
19882define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19883; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19884; VLX:       # %bb.0: # %entry
19885; VLX-NEXT:    kmovd %edi, %k1
19886; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19887; VLX-NEXT:    kmovd %k0, %eax
19888; VLX-NEXT:    retq
19889;
19890; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19891; NoVLX:       # %bb.0: # %entry
19892; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19893; NoVLX-NEXT:    kmovw %edi, %k1
19894; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19895; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19896; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19897; NoVLX-NEXT:    kmovw %k0, %eax
19898; NoVLX-NEXT:    vzeroupper
19899; NoVLX-NEXT:    retq
19900entry:
19901  %0 = bitcast <2 x i64> %__a to <4 x float>
19902  %load = load float, ptr %__b
19903  %vec = insertelement <4 x float> undef, float %load, i32 0
19904  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19905  %2 = fcmp oeq <4 x float> %0, %1
19906  %3 = bitcast i4 %__u to <4 x i1>
19907  %4 = and <4 x i1> %2, %3
19908  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19909  %6 = bitcast <32 x i1> %5 to i32
19910  ret i32 %6
19911}
19912
19913
19914
19915define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19916; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19917; VLX:       # %bb.0: # %entry
19918; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19919; VLX-NEXT:    kmovq %k0, %rax
19920; VLX-NEXT:    retq
19921;
19922; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19923; NoVLX:       # %bb.0: # %entry
19924; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19925; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19926; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19927; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19928; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19929; NoVLX-NEXT:    kmovw %k0, %eax
19930; NoVLX-NEXT:    vzeroupper
19931; NoVLX-NEXT:    retq
19932entry:
19933  %0 = bitcast <2 x i64> %__a to <4 x float>
19934  %1 = bitcast <2 x i64> %__b to <4 x float>
19935  %2 = fcmp oeq <4 x float> %0, %1
19936  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19937  %4 = bitcast <64 x i1> %3 to i64
19938  ret i64 %4
19939}
19940
19941define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19942; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19943; VLX:       # %bb.0: # %entry
19944; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19945; VLX-NEXT:    kmovq %k0, %rax
19946; VLX-NEXT:    retq
19947;
19948; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19949; NoVLX:       # %bb.0: # %entry
19950; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19951; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19952; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19953; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19954; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19955; NoVLX-NEXT:    kmovw %k0, %eax
19956; NoVLX-NEXT:    vzeroupper
19957; NoVLX-NEXT:    retq
19958entry:
19959  %0 = bitcast <2 x i64> %__a to <4 x float>
19960  %load = load <2 x i64>, ptr %__b
19961  %1 = bitcast <2 x i64> %load to <4 x float>
19962  %2 = fcmp oeq <4 x float> %0, %1
19963  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19964  %4 = bitcast <64 x i1> %3 to i64
19965  ret i64 %4
19966}
19967
19968define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19969; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19970; VLX:       # %bb.0: # %entry
19971; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
19972; VLX-NEXT:    kmovq %k0, %rax
19973; VLX-NEXT:    retq
19974;
19975; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19976; NoVLX:       # %bb.0: # %entry
19977; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19978; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
19979; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19980; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19981; NoVLX-NEXT:    kmovw %k0, %eax
19982; NoVLX-NEXT:    vzeroupper
19983; NoVLX-NEXT:    retq
19984entry:
19985  %0 = bitcast <2 x i64> %__a to <4 x float>
19986  %load = load float, ptr %__b
19987  %vec = insertelement <4 x float> undef, float %load, i32 0
19988  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19989  %2 = fcmp oeq <4 x float> %0, %1
19990  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19991  %4 = bitcast <64 x i1> %3 to i64
19992  ret i64 %4
19993}
19994
19995define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19996; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
19997; VLX:       # %bb.0: # %entry
19998; VLX-NEXT:    kmovd %edi, %k1
19999; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20000; VLX-NEXT:    kmovq %k0, %rax
20001; VLX-NEXT:    retq
20002;
20003; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20004; NoVLX:       # %bb.0: # %entry
20005; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20006; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20007; NoVLX-NEXT:    kmovw %edi, %k1
20008; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20009; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20010; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20011; NoVLX-NEXT:    kmovw %k0, %eax
20012; NoVLX-NEXT:    vzeroupper
20013; NoVLX-NEXT:    retq
20014entry:
20015  %0 = bitcast <2 x i64> %__a to <4 x float>
20016  %1 = bitcast <2 x i64> %__b to <4 x float>
20017  %2 = fcmp oeq <4 x float> %0, %1
20018  %3 = bitcast i4 %__u to <4 x i1>
20019  %4 = and <4 x i1> %2, %3
20020  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20021  %6 = bitcast <64 x i1> %5 to i64
20022  ret i64 %6
20023}
20024
20025define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
20026; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20027; VLX:       # %bb.0: # %entry
20028; VLX-NEXT:    kmovd %edi, %k1
20029; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20030; VLX-NEXT:    kmovq %k0, %rax
20031; VLX-NEXT:    retq
20032;
20033; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20034; NoVLX:       # %bb.0: # %entry
20035; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20036; NoVLX-NEXT:    kmovw %edi, %k1
20037; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
20038; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20039; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20040; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20041; NoVLX-NEXT:    kmovw %k0, %eax
20042; NoVLX-NEXT:    vzeroupper
20043; NoVLX-NEXT:    retq
20044entry:
20045  %0 = bitcast <2 x i64> %__a to <4 x float>
20046  %load = load <2 x i64>, ptr %__b
20047  %1 = bitcast <2 x i64> %load to <4 x float>
20048  %2 = fcmp oeq <4 x float> %0, %1
20049  %3 = bitcast i4 %__u to <4 x i1>
20050  %4 = and <4 x i1> %2, %3
20051  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20052  %6 = bitcast <64 x i1> %5 to i64
20053  ret i64 %6
20054}
20055
20056define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
20057; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20058; VLX:       # %bb.0: # %entry
20059; VLX-NEXT:    kmovd %edi, %k1
20060; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20061; VLX-NEXT:    kmovq %k0, %rax
20062; VLX-NEXT:    retq
20063;
20064; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20065; NoVLX:       # %bb.0: # %entry
20066; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20067; NoVLX-NEXT:    kmovw %edi, %k1
20068; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20069; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20070; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20071; NoVLX-NEXT:    kmovw %k0, %eax
20072; NoVLX-NEXT:    vzeroupper
20073; NoVLX-NEXT:    retq
20074entry:
20075  %0 = bitcast <2 x i64> %__a to <4 x float>
20076  %load = load float, ptr %__b
20077  %vec = insertelement <4 x float> undef, float %load, i32 0
20078  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20079  %2 = fcmp oeq <4 x float> %0, %1
20080  %3 = bitcast i4 %__u to <4 x i1>
20081  %4 = and <4 x i1> %2, %3
20082  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20083  %6 = bitcast <64 x i1> %5 to i64
20084  ret i64 %6
20085}
20086
20087
20088
20089define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20090; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20091; VLX:       # %bb.0: # %entry
20092; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20093; VLX-NEXT:    kmovd %k0, %eax
20094; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20095; VLX-NEXT:    vzeroupper
20096; VLX-NEXT:    retq
20097;
20098; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20099; NoVLX:       # %bb.0: # %entry
20100; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20101; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20102; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20103; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20104; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20105; NoVLX-NEXT:    kmovw %k0, %eax
20106; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20107; NoVLX-NEXT:    vzeroupper
20108; NoVLX-NEXT:    retq
20109entry:
20110  %0 = bitcast <4 x i64> %__a to <8 x float>
20111  %1 = bitcast <4 x i64> %__b to <8 x float>
20112  %2 = fcmp oeq <8 x float> %0, %1
20113  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20114  %4 = bitcast <16 x i1> %3 to i16
20115  ret i16 %4
20116}
20117
20118define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20119; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20120; VLX:       # %bb.0: # %entry
20121; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20122; VLX-NEXT:    kmovd %k0, %eax
20123; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20124; VLX-NEXT:    vzeroupper
20125; VLX-NEXT:    retq
20126;
20127; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20128; NoVLX:       # %bb.0: # %entry
20129; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20130; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20131; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20132; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20133; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20134; NoVLX-NEXT:    kmovw %k0, %eax
20135; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20136; NoVLX-NEXT:    vzeroupper
20137; NoVLX-NEXT:    retq
20138entry:
20139  %0 = bitcast <4 x i64> %__a to <8 x float>
20140  %load = load <4 x i64>, ptr %__b
20141  %1 = bitcast <4 x i64> %load to <8 x float>
20142  %2 = fcmp oeq <8 x float> %0, %1
20143  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20144  %4 = bitcast <16 x i1> %3 to i16
20145  ret i16 %4
20146}
20147
20148define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20149; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20150; VLX:       # %bb.0: # %entry
20151; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20152; VLX-NEXT:    kmovd %k0, %eax
20153; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20154; VLX-NEXT:    vzeroupper
20155; VLX-NEXT:    retq
20156;
20157; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20158; NoVLX:       # %bb.0: # %entry
20159; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20160; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20161; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20162; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20163; NoVLX-NEXT:    kmovw %k0, %eax
20164; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20165; NoVLX-NEXT:    vzeroupper
20166; NoVLX-NEXT:    retq
20167entry:
20168  %0 = bitcast <4 x i64> %__a to <8 x float>
20169  %load = load float, ptr %__b
20170  %vec = insertelement <8 x float> undef, float %load, i32 0
20171  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20172  %2 = fcmp oeq <8 x float> %0, %1
20173  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20174  %4 = bitcast <16 x i1> %3 to i16
20175  ret i16 %4
20176}
20177
20178define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20179; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20180; VLX:       # %bb.0: # %entry
20181; VLX-NEXT:    kmovd %edi, %k1
20182; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20183; VLX-NEXT:    kmovd %k0, %eax
20184; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20185; VLX-NEXT:    vzeroupper
20186; VLX-NEXT:    retq
20187;
20188; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20189; NoVLX:       # %bb.0: # %entry
20190; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20191; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20192; NoVLX-NEXT:    kmovw %edi, %k1
20193; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20194; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20195; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20196; NoVLX-NEXT:    kmovw %k0, %eax
20197; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20198; NoVLX-NEXT:    vzeroupper
20199; NoVLX-NEXT:    retq
20200entry:
20201  %0 = bitcast <4 x i64> %__a to <8 x float>
20202  %1 = bitcast <4 x i64> %__b to <8 x float>
20203  %2 = fcmp oeq <8 x float> %0, %1
20204  %3 = bitcast i8 %__u to <8 x i1>
20205  %4 = and <8 x i1> %2, %3
20206  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20207  %6 = bitcast <16 x i1> %5 to i16
20208  ret i16 %6
20209}
20210
20211define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20212; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20213; VLX:       # %bb.0: # %entry
20214; VLX-NEXT:    kmovd %edi, %k1
20215; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20216; VLX-NEXT:    kmovd %k0, %eax
20217; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20218; VLX-NEXT:    vzeroupper
20219; VLX-NEXT:    retq
20220;
20221; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20222; NoVLX:       # %bb.0: # %entry
20223; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20224; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
20225; NoVLX-NEXT:    kmovw %edi, %k1
20226; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20227; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20228; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20229; NoVLX-NEXT:    kmovw %k0, %eax
20230; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20231; NoVLX-NEXT:    vzeroupper
20232; NoVLX-NEXT:    retq
20233entry:
20234  %0 = bitcast <4 x i64> %__a to <8 x float>
20235  %load = load <4 x i64>, ptr %__b
20236  %1 = bitcast <4 x i64> %load to <8 x float>
20237  %2 = fcmp oeq <8 x float> %0, %1
20238  %3 = bitcast i8 %__u to <8 x i1>
20239  %4 = and <8 x i1> %2, %3
20240  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20241  %6 = bitcast <16 x i1> %5 to i16
20242  ret i16 %6
20243}
20244
20245define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20246; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20247; VLX:       # %bb.0: # %entry
20248; VLX-NEXT:    kmovd %edi, %k1
20249; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20250; VLX-NEXT:    kmovd %k0, %eax
20251; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20252; VLX-NEXT:    vzeroupper
20253; VLX-NEXT:    retq
20254;
20255; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20256; NoVLX:       # %bb.0: # %entry
20257; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20258; NoVLX-NEXT:    kmovw %edi, %k1
20259; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20260; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20261; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20262; NoVLX-NEXT:    kmovw %k0, %eax
20263; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20264; NoVLX-NEXT:    vzeroupper
20265; NoVLX-NEXT:    retq
20266entry:
20267  %0 = bitcast <4 x i64> %__a to <8 x float>
20268  %load = load float, ptr %__b
20269  %vec = insertelement <8 x float> undef, float %load, i32 0
20270  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20271  %2 = fcmp oeq <8 x float> %0, %1
20272  %3 = bitcast i8 %__u to <8 x i1>
20273  %4 = and <8 x i1> %2, %3
20274  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20275  %6 = bitcast <16 x i1> %5 to i16
20276  ret i16 %6
20277}
20278
20279
20280
20281define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20282; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20283; VLX:       # %bb.0: # %entry
20284; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20285; VLX-NEXT:    kmovd %k0, %eax
20286; VLX-NEXT:    vzeroupper
20287; VLX-NEXT:    retq
20288;
20289; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20290; NoVLX:       # %bb.0: # %entry
20291; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20292; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20293; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20294; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20295; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20296; NoVLX-NEXT:    kmovw %k0, %eax
20297; NoVLX-NEXT:    vzeroupper
20298; NoVLX-NEXT:    retq
20299entry:
20300  %0 = bitcast <4 x i64> %__a to <8 x float>
20301  %1 = bitcast <4 x i64> %__b to <8 x float>
20302  %2 = fcmp oeq <8 x float> %0, %1
20303  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20304  %4 = bitcast <32 x i1> %3 to i32
20305  ret i32 %4
20306}
20307
20308define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20309; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20310; VLX:       # %bb.0: # %entry
20311; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20312; VLX-NEXT:    kmovd %k0, %eax
20313; VLX-NEXT:    vzeroupper
20314; VLX-NEXT:    retq
20315;
20316; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20317; NoVLX:       # %bb.0: # %entry
20318; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20319; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20320; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20321; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20322; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20323; NoVLX-NEXT:    kmovw %k0, %eax
20324; NoVLX-NEXT:    vzeroupper
20325; NoVLX-NEXT:    retq
20326entry:
20327  %0 = bitcast <4 x i64> %__a to <8 x float>
20328  %load = load <4 x i64>, ptr %__b
20329  %1 = bitcast <4 x i64> %load to <8 x float>
20330  %2 = fcmp oeq <8 x float> %0, %1
20331  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20332  %4 = bitcast <32 x i1> %3 to i32
20333  ret i32 %4
20334}
20335
20336define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20337; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20338; VLX:       # %bb.0: # %entry
20339; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20340; VLX-NEXT:    kmovd %k0, %eax
20341; VLX-NEXT:    vzeroupper
20342; VLX-NEXT:    retq
20343;
20344; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20345; NoVLX:       # %bb.0: # %entry
20346; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20347; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20348; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20349; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20350; NoVLX-NEXT:    kmovw %k0, %eax
20351; NoVLX-NEXT:    vzeroupper
20352; NoVLX-NEXT:    retq
20353entry:
20354  %0 = bitcast <4 x i64> %__a to <8 x float>
20355  %load = load float, ptr %__b
20356  %vec = insertelement <8 x float> undef, float %load, i32 0
20357  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20358  %2 = fcmp oeq <8 x float> %0, %1
20359  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20360  %4 = bitcast <32 x i1> %3 to i32
20361  ret i32 %4
20362}
20363
20364define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20365; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20366; VLX:       # %bb.0: # %entry
20367; VLX-NEXT:    kmovd %edi, %k1
20368; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20369; VLX-NEXT:    kmovd %k0, %eax
20370; VLX-NEXT:    vzeroupper
20371; VLX-NEXT:    retq
20372;
20373; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20374; NoVLX:       # %bb.0: # %entry
20375; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20376; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20377; NoVLX-NEXT:    kmovw %edi, %k1
20378; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20379; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20380; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20381; NoVLX-NEXT:    kmovw %k0, %eax
20382; NoVLX-NEXT:    vzeroupper
20383; NoVLX-NEXT:    retq
20384entry:
20385  %0 = bitcast <4 x i64> %__a to <8 x float>
20386  %1 = bitcast <4 x i64> %__b to <8 x float>
20387  %2 = fcmp oeq <8 x float> %0, %1
20388  %3 = bitcast i8 %__u to <8 x i1>
20389  %4 = and <8 x i1> %2, %3
20390  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20391  %6 = bitcast <32 x i1> %5 to i32
20392  ret i32 %6
20393}
20394
20395define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20396; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20397; VLX:       # %bb.0: # %entry
20398; VLX-NEXT:    kmovd %edi, %k1
20399; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20400; VLX-NEXT:    kmovd %k0, %eax
20401; VLX-NEXT:    vzeroupper
20402; VLX-NEXT:    retq
20403;
20404; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20405; NoVLX:       # %bb.0: # %entry
20406; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20407; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
20408; NoVLX-NEXT:    kmovw %edi, %k1
20409; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20410; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20411; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20412; NoVLX-NEXT:    kmovw %k0, %eax
20413; NoVLX-NEXT:    vzeroupper
20414; NoVLX-NEXT:    retq
20415entry:
20416  %0 = bitcast <4 x i64> %__a to <8 x float>
20417  %load = load <4 x i64>, ptr %__b
20418  %1 = bitcast <4 x i64> %load to <8 x float>
20419  %2 = fcmp oeq <8 x float> %0, %1
20420  %3 = bitcast i8 %__u to <8 x i1>
20421  %4 = and <8 x i1> %2, %3
20422  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20423  %6 = bitcast <32 x i1> %5 to i32
20424  ret i32 %6
20425}
20426
20427define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20428; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20429; VLX:       # %bb.0: # %entry
20430; VLX-NEXT:    kmovd %edi, %k1
20431; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20432; VLX-NEXT:    kmovd %k0, %eax
20433; VLX-NEXT:    vzeroupper
20434; VLX-NEXT:    retq
20435;
20436; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20437; NoVLX:       # %bb.0: # %entry
20438; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20439; NoVLX-NEXT:    kmovw %edi, %k1
20440; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20441; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20442; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20443; NoVLX-NEXT:    kmovw %k0, %eax
20444; NoVLX-NEXT:    vzeroupper
20445; NoVLX-NEXT:    retq
20446entry:
20447  %0 = bitcast <4 x i64> %__a to <8 x float>
20448  %load = load float, ptr %__b
20449  %vec = insertelement <8 x float> undef, float %load, i32 0
20450  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20451  %2 = fcmp oeq <8 x float> %0, %1
20452  %3 = bitcast i8 %__u to <8 x i1>
20453  %4 = and <8 x i1> %2, %3
20454  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20455  %6 = bitcast <32 x i1> %5 to i32
20456  ret i32 %6
20457}
20458
20459
20460
20461define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20462; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20463; VLX:       # %bb.0: # %entry
20464; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20465; VLX-NEXT:    kmovq %k0, %rax
20466; VLX-NEXT:    vzeroupper
20467; VLX-NEXT:    retq
20468;
20469; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20470; NoVLX:       # %bb.0: # %entry
20471; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20472; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20473; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20474; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20475; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20476; NoVLX-NEXT:    kmovw %k0, %eax
20477; NoVLX-NEXT:    vzeroupper
20478; NoVLX-NEXT:    retq
20479entry:
20480  %0 = bitcast <4 x i64> %__a to <8 x float>
20481  %1 = bitcast <4 x i64> %__b to <8 x float>
20482  %2 = fcmp oeq <8 x float> %0, %1
20483  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20484  %4 = bitcast <64 x i1> %3 to i64
20485  ret i64 %4
20486}
20487
20488define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20489; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20490; VLX:       # %bb.0: # %entry
20491; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20492; VLX-NEXT:    kmovq %k0, %rax
20493; VLX-NEXT:    vzeroupper
20494; VLX-NEXT:    retq
20495;
20496; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20497; NoVLX:       # %bb.0: # %entry
20498; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20499; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20500; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20501; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20502; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20503; NoVLX-NEXT:    kmovw %k0, %eax
20504; NoVLX-NEXT:    vzeroupper
20505; NoVLX-NEXT:    retq
20506entry:
20507  %0 = bitcast <4 x i64> %__a to <8 x float>
20508  %load = load <4 x i64>, ptr %__b
20509  %1 = bitcast <4 x i64> %load to <8 x float>
20510  %2 = fcmp oeq <8 x float> %0, %1
20511  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20512  %4 = bitcast <64 x i1> %3 to i64
20513  ret i64 %4
20514}
20515
20516define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20517; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20518; VLX:       # %bb.0: # %entry
20519; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20520; VLX-NEXT:    kmovq %k0, %rax
20521; VLX-NEXT:    vzeroupper
20522; VLX-NEXT:    retq
20523;
20524; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20525; NoVLX:       # %bb.0: # %entry
20526; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20527; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20528; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20529; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20530; NoVLX-NEXT:    kmovw %k0, %eax
20531; NoVLX-NEXT:    vzeroupper
20532; NoVLX-NEXT:    retq
20533entry:
20534  %0 = bitcast <4 x i64> %__a to <8 x float>
20535  %load = load float, ptr %__b
20536  %vec = insertelement <8 x float> undef, float %load, i32 0
20537  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20538  %2 = fcmp oeq <8 x float> %0, %1
20539  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20540  %4 = bitcast <64 x i1> %3 to i64
20541  ret i64 %4
20542}
20543
20544define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20545; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20546; VLX:       # %bb.0: # %entry
20547; VLX-NEXT:    kmovd %edi, %k1
20548; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20549; VLX-NEXT:    kmovq %k0, %rax
20550; VLX-NEXT:    vzeroupper
20551; VLX-NEXT:    retq
20552;
20553; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20554; NoVLX:       # %bb.0: # %entry
20555; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20556; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20557; NoVLX-NEXT:    kmovw %edi, %k1
20558; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20559; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20560; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20561; NoVLX-NEXT:    kmovw %k0, %eax
20562; NoVLX-NEXT:    vzeroupper
20563; NoVLX-NEXT:    retq
20564entry:
20565  %0 = bitcast <4 x i64> %__a to <8 x float>
20566  %1 = bitcast <4 x i64> %__b to <8 x float>
20567  %2 = fcmp oeq <8 x float> %0, %1
20568  %3 = bitcast i8 %__u to <8 x i1>
20569  %4 = and <8 x i1> %2, %3
20570  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20571  %6 = bitcast <64 x i1> %5 to i64
20572  ret i64 %6
20573}
20574
20575define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20576; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20577; VLX:       # %bb.0: # %entry
20578; VLX-NEXT:    kmovd %edi, %k1
20579; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20580; VLX-NEXT:    kmovq %k0, %rax
20581; VLX-NEXT:    vzeroupper
20582; VLX-NEXT:    retq
20583;
20584; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20585; NoVLX:       # %bb.0: # %entry
20586; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20587; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
20588; NoVLX-NEXT:    kmovw %edi, %k1
20589; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20590; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20591; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20592; NoVLX-NEXT:    kmovw %k0, %eax
20593; NoVLX-NEXT:    vzeroupper
20594; NoVLX-NEXT:    retq
20595entry:
20596  %0 = bitcast <4 x i64> %__a to <8 x float>
20597  %load = load <4 x i64>, ptr %__b
20598  %1 = bitcast <4 x i64> %load to <8 x float>
20599  %2 = fcmp oeq <8 x float> %0, %1
20600  %3 = bitcast i8 %__u to <8 x i1>
20601  %4 = and <8 x i1> %2, %3
20602  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20603  %6 = bitcast <64 x i1> %5 to i64
20604  ret i64 %6
20605}
20606
20607define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20608; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20609; VLX:       # %bb.0: # %entry
20610; VLX-NEXT:    kmovd %edi, %k1
20611; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20612; VLX-NEXT:    kmovq %k0, %rax
20613; VLX-NEXT:    vzeroupper
20614; VLX-NEXT:    retq
20615;
20616; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20617; NoVLX:       # %bb.0: # %entry
20618; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20619; NoVLX-NEXT:    kmovw %edi, %k1
20620; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20621; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20622; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20623; NoVLX-NEXT:    kmovw %k0, %eax
20624; NoVLX-NEXT:    vzeroupper
20625; NoVLX-NEXT:    retq
20626entry:
20627  %0 = bitcast <4 x i64> %__a to <8 x float>
20628  %load = load float, ptr %__b
20629  %vec = insertelement <8 x float> undef, float %load, i32 0
20630  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20631  %2 = fcmp oeq <8 x float> %0, %1
20632  %3 = bitcast i8 %__u to <8 x i1>
20633  %4 = and <8 x i1> %2, %3
20634  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20635  %6 = bitcast <64 x i1> %5 to i64
20636  ret i64 %6
20637}
20638
20639
20640
20641define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20642; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20643; VLX:       # %bb.0: # %entry
20644; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20645; VLX-NEXT:    kmovd %k0, %eax
20646; VLX-NEXT:    vzeroupper
20647; VLX-NEXT:    retq
20648;
20649; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20650; NoVLX:       # %bb.0: # %entry
20651; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20652; NoVLX-NEXT:    kmovw %k0, %eax
20653; NoVLX-NEXT:    vzeroupper
20654; NoVLX-NEXT:    retq
20655entry:
20656  %0 = bitcast <8 x i64> %__a to <16 x float>
20657  %1 = bitcast <8 x i64> %__b to <16 x float>
20658  %2 = fcmp oeq <16 x float> %0, %1
20659  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20660  %4 = bitcast <32 x i1> %3 to i32
20661  ret i32 %4
20662}
20663
20664define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20665; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20666; VLX:       # %bb.0: # %entry
20667; VLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
20668; VLX-NEXT:    kmovd %k0, %eax
20669; VLX-NEXT:    vzeroupper
20670; VLX-NEXT:    retq
20671;
20672; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20673; NoVLX:       # %bb.0: # %entry
20674; NoVLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
20675; NoVLX-NEXT:    kmovw %k0, %eax
20676; NoVLX-NEXT:    vzeroupper
20677; NoVLX-NEXT:    retq
20678entry:
20679  %0 = bitcast <8 x i64> %__a to <16 x float>
20680  %load = load <8 x i64>, ptr %__b
20681  %1 = bitcast <8 x i64> %load to <16 x float>
20682  %2 = fcmp oeq <16 x float> %0, %1
20683  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20684  %4 = bitcast <32 x i1> %3 to i32
20685  ret i32 %4
20686}
20687
20688define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20689; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20690; VLX:       # %bb.0: # %entry
20691; VLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20692; VLX-NEXT:    kmovd %k0, %eax
20693; VLX-NEXT:    vzeroupper
20694; VLX-NEXT:    retq
20695;
20696; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20697; NoVLX:       # %bb.0: # %entry
20698; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20699; NoVLX-NEXT:    kmovw %k0, %eax
20700; NoVLX-NEXT:    vzeroupper
20701; NoVLX-NEXT:    retq
20702entry:
20703  %0 = bitcast <8 x i64> %__a to <16 x float>
20704  %load = load float, ptr %__b
20705  %vec = insertelement <16 x float> undef, float %load, i32 0
20706  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20707  %2 = fcmp oeq <16 x float> %0, %1
20708  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20709  %4 = bitcast <32 x i1> %3 to i32
20710  ret i32 %4
20711}
20712
20713define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20714; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20715; VLX:       # %bb.0: # %entry
20716; VLX-NEXT:    kmovd %edi, %k1
20717; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20718; VLX-NEXT:    kmovd %k0, %eax
20719; VLX-NEXT:    vzeroupper
20720; VLX-NEXT:    retq
20721;
20722; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20723; NoVLX:       # %bb.0: # %entry
20724; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20725; NoVLX-NEXT:    kmovw %k0, %eax
20726; NoVLX-NEXT:    andl %edi, %eax
20727; NoVLX-NEXT:    vzeroupper
20728; NoVLX-NEXT:    retq
20729entry:
20730  %0 = bitcast <8 x i64> %__a to <16 x float>
20731  %1 = bitcast <8 x i64> %__b to <16 x float>
20732  %2 = fcmp oeq <16 x float> %0, %1
20733  %3 = bitcast i16 %__u to <16 x i1>
20734  %4 = and <16 x i1> %2, %3
20735  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20736  %6 = bitcast <32 x i1> %5 to i32
20737  ret i32 %6
20738}
20739
20740define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20741; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20742; VLX:       # %bb.0: # %entry
20743; VLX-NEXT:    kmovd %edi, %k1
20744; VLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20745; VLX-NEXT:    kmovd %k0, %eax
20746; VLX-NEXT:    vzeroupper
20747; VLX-NEXT:    retq
20748;
20749; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20750; NoVLX:       # %bb.0: # %entry
20751; NoVLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0
20752; NoVLX-NEXT:    kmovw %k0, %eax
20753; NoVLX-NEXT:    andl %edi, %eax
20754; NoVLX-NEXT:    vzeroupper
20755; NoVLX-NEXT:    retq
20756entry:
20757  %0 = bitcast <8 x i64> %__a to <16 x float>
20758  %load = load <8 x i64>, ptr %__b
20759  %1 = bitcast <8 x i64> %load to <16 x float>
20760  %2 = fcmp oeq <16 x float> %0, %1
20761  %3 = bitcast i16 %__u to <16 x i1>
20762  %4 = and <16 x i1> %2, %3
20763  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20764  %6 = bitcast <32 x i1> %5 to i32
20765  ret i32 %6
20766}
20767
20768define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20769; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20770; VLX:       # %bb.0: # %entry
20771; VLX-NEXT:    kmovd %edi, %k1
20772; VLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20773; VLX-NEXT:    kmovd %k0, %eax
20774; VLX-NEXT:    vzeroupper
20775; VLX-NEXT:    retq
20776;
20777; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20778; NoVLX:       # %bb.0: # %entry
20779; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0
20780; NoVLX-NEXT:    kmovw %k0, %eax
20781; NoVLX-NEXT:    andl %edi, %eax
20782; NoVLX-NEXT:    vzeroupper
20783; NoVLX-NEXT:    retq
20784entry:
20785  %0 = bitcast <8 x i64> %__a to <16 x float>
20786  %load = load float, ptr %__b
20787  %vec = insertelement <16 x float> undef, float %load, i32 0
20788  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20789  %2 = fcmp oeq <16 x float> %0, %1
20790  %3 = bitcast i16 %__u to <16 x i1>
20791  %4 = and <16 x i1> %2, %3
20792  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20793  %6 = bitcast <32 x i1> %5 to i32
20794  ret i32 %6
20795}
20796
20797
20798
20799define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20800; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
20801; CHECK:       # %bb.0: # %entry
20802; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
20803; CHECK-NEXT:    kmovw %k0, %eax
20804; CHECK-NEXT:    vzeroupper
20805; CHECK-NEXT:    retq
20806entry:
20807  %0 = bitcast <8 x i64> %__a to <16 x float>
20808  %1 = bitcast <8 x i64> %__b to <16 x float>
20809  %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
20810  %3 = bitcast <16 x i1> %2 to i16
20811  %4 = zext i16 %3 to i32
20812  ret i32 %4
20813}
20814
20815define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20816; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20817; VLX:       # %bb.0: # %entry
20818; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
20819; VLX-NEXT:    kmovd %k0, %eax
20820; VLX-NEXT:    andl %edi, %eax
20821; VLX-NEXT:    vzeroupper
20822; VLX-NEXT:    retq
20823;
20824; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20825; NoVLX:       # %bb.0: # %entry
20826; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
20827; NoVLX-NEXT:    kmovw %k0, %eax
20828; NoVLX-NEXT:    andl %edi, %eax
20829; NoVLX-NEXT:    vzeroupper
20830; NoVLX-NEXT:    retq
20831entry:
20832  %0 = bitcast <8 x i64> %__a to <16 x float>
20833  %1 = bitcast <8 x i64> %__b to <16 x float>
20834  %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
20835  %3 = bitcast i16 %__u to <16 x i1>
20836  %4 = and <16 x i1> %2, %3
20837  %5 = bitcast <16 x i1> %4 to i16
20838  %6 = zext i16 %5 to i32
20839  ret i32 %6
20840}
20841
20842
20843
20844define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20845; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20846; VLX:       # %bb.0: # %entry
20847; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20848; VLX-NEXT:    kmovq %k0, %rax
20849; VLX-NEXT:    vzeroupper
20850; VLX-NEXT:    retq
20851;
20852; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20853; NoVLX:       # %bb.0: # %entry
20854; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20855; NoVLX-NEXT:    kmovw %k0, %eax
20856; NoVLX-NEXT:    vzeroupper
20857; NoVLX-NEXT:    retq
20858entry:
20859  %0 = bitcast <8 x i64> %__a to <16 x float>
20860  %1 = bitcast <8 x i64> %__b to <16 x float>
20861  %2 = fcmp oeq <16 x float> %0, %1
20862  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20863  %4 = bitcast <64 x i1> %3 to i64
20864  ret i64 %4
20865}
20866
20867define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20868; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20869; VLX:       # %bb.0: # %entry
20870; VLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
20871; VLX-NEXT:    kmovq %k0, %rax
20872; VLX-NEXT:    vzeroupper
20873; VLX-NEXT:    retq
20874;
20875; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20876; NoVLX:       # %bb.0: # %entry
20877; NoVLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
20878; NoVLX-NEXT:    kmovw %k0, %eax
20879; NoVLX-NEXT:    vzeroupper
20880; NoVLX-NEXT:    retq
20881entry:
20882  %0 = bitcast <8 x i64> %__a to <16 x float>
20883  %load = load <8 x i64>, ptr %__b
20884  %1 = bitcast <8 x i64> %load to <16 x float>
20885  %2 = fcmp oeq <16 x float> %0, %1
20886  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20887  %4 = bitcast <64 x i1> %3 to i64
20888  ret i64 %4
20889}
20890
20891define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20892; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20893; VLX:       # %bb.0: # %entry
20894; VLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20895; VLX-NEXT:    kmovq %k0, %rax
20896; VLX-NEXT:    vzeroupper
20897; VLX-NEXT:    retq
20898;
20899; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20900; NoVLX:       # %bb.0: # %entry
20901; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
20902; NoVLX-NEXT:    kmovw %k0, %eax
20903; NoVLX-NEXT:    vzeroupper
20904; NoVLX-NEXT:    retq
20905entry:
20906  %0 = bitcast <8 x i64> %__a to <16 x float>
20907  %load = load float, ptr %__b
20908  %vec = insertelement <16 x float> undef, float %load, i32 0
20909  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20910  %2 = fcmp oeq <16 x float> %0, %1
20911  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20912  %4 = bitcast <64 x i1> %3 to i64
20913  ret i64 %4
20914}
20915
20916define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20917; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20918; VLX:       # %bb.0: # %entry
20919; VLX-NEXT:    kmovd %edi, %k1
20920; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20921; VLX-NEXT:    kmovq %k0, %rax
20922; VLX-NEXT:    vzeroupper
20923; VLX-NEXT:    retq
20924;
20925; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20926; NoVLX:       # %bb.0: # %entry
20927; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20928; NoVLX-NEXT:    kmovw %k0, %eax
20929; NoVLX-NEXT:    andl %edi, %eax
20930; NoVLX-NEXT:    vzeroupper
20931; NoVLX-NEXT:    retq
20932entry:
20933  %0 = bitcast <8 x i64> %__a to <16 x float>
20934  %1 = bitcast <8 x i64> %__b to <16 x float>
20935  %2 = fcmp oeq <16 x float> %0, %1
20936  %3 = bitcast i16 %__u to <16 x i1>
20937  %4 = and <16 x i1> %2, %3
20938  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20939  %6 = bitcast <64 x i1> %5 to i64
20940  ret i64 %6
20941}
20942
20943define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20944; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20945; VLX:       # %bb.0: # %entry
20946; VLX-NEXT:    kmovd %edi, %k1
20947; VLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20948; VLX-NEXT:    kmovq %k0, %rax
20949; VLX-NEXT:    vzeroupper
20950; VLX-NEXT:    retq
20951;
20952; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20953; NoVLX:       # %bb.0: # %entry
20954; NoVLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0
20955; NoVLX-NEXT:    kmovw %k0, %eax
20956; NoVLX-NEXT:    andl %edi, %eax
20957; NoVLX-NEXT:    vzeroupper
20958; NoVLX-NEXT:    retq
20959entry:
20960  %0 = bitcast <8 x i64> %__a to <16 x float>
20961  %load = load <8 x i64>, ptr %__b
20962  %1 = bitcast <8 x i64> %load to <16 x float>
20963  %2 = fcmp oeq <16 x float> %0, %1
20964  %3 = bitcast i16 %__u to <16 x i1>
20965  %4 = and <16 x i1> %2, %3
20966  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20967  %6 = bitcast <64 x i1> %5 to i64
20968  ret i64 %6
20969}
20970
20971define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20972; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20973; VLX:       # %bb.0: # %entry
20974; VLX-NEXT:    kmovd %edi, %k1
20975; VLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20976; VLX-NEXT:    kmovq %k0, %rax
20977; VLX-NEXT:    vzeroupper
20978; VLX-NEXT:    retq
20979;
20980; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20981; NoVLX:       # %bb.0: # %entry
20982; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0
20983; NoVLX-NEXT:    kmovw %k0, %eax
20984; NoVLX-NEXT:    andl %edi, %eax
20985; NoVLX-NEXT:    vzeroupper
20986; NoVLX-NEXT:    retq
20987entry:
20988  %0 = bitcast <8 x i64> %__a to <16 x float>
20989  %load = load float, ptr %__b
20990  %vec = insertelement <16 x float> undef, float %load, i32 0
20991  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20992  %2 = fcmp oeq <16 x float> %0, %1
20993  %3 = bitcast i16 %__u to <16 x i1>
20994  %4 = and <16 x i1> %2, %3
20995  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20996  %6 = bitcast <64 x i1> %5 to i64
20997  ret i64 %6
20998}
20999
21000
21001
21002define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21003; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21004; CHECK:       # %bb.0: # %entry
21005; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21006; CHECK-NEXT:    kmovw %k0, %eax
21007; CHECK-NEXT:    vzeroupper
21008; CHECK-NEXT:    retq
21009entry:
21010  %0 = bitcast <8 x i64> %__a to <16 x float>
21011  %1 = bitcast <8 x i64> %__b to <16 x float>
21012  %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
21013  %3 = bitcast <16 x i1> %2 to i16
21014  %4 = zext i16 %3 to i64
21015  ret i64 %4
21016}
21017
21018define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21019; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21020; VLX:       # %bb.0: # %entry
21021; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21022; VLX-NEXT:    kmovd %k0, %eax
21023; VLX-NEXT:    andl %edi, %eax
21024; VLX-NEXT:    vzeroupper
21025; VLX-NEXT:    retq
21026;
21027; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21028; NoVLX:       # %bb.0: # %entry
21029; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21030; NoVLX-NEXT:    kmovw %k0, %eax
21031; NoVLX-NEXT:    andl %edi, %eax
21032; NoVLX-NEXT:    vzeroupper
21033; NoVLX-NEXT:    retq
21034entry:
21035  %0 = bitcast <8 x i64> %__a to <16 x float>
21036  %1 = bitcast <8 x i64> %__b to <16 x float>
21037  %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
21038  %3 = bitcast i16 %__u to <16 x i1>
21039  %4 = and <16 x i1> %2, %3
21040  %5 = bitcast <16 x i1> %4 to i16
21041  %6 = zext i16 %5 to i64
21042  ret i64 %6
21043}
21044
21045
21046
21047declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32)
21048define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21049; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21050; VLX:       # %bb.0: # %entry
21051; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21052; VLX-NEXT:    kmovb %k0, %eax
21053; VLX-NEXT:    retq
21054;
21055; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21056; NoVLX:       # %bb.0: # %entry
21057; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21058; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21059; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21060; NoVLX-NEXT:    kmovw %k0, %eax
21061; NoVLX-NEXT:    andl $3, %eax
21062; NoVLX-NEXT:    vzeroupper
21063; NoVLX-NEXT:    retq
21064entry:
21065  %0 = bitcast <2 x i64> %__a to <2 x double>
21066  %1 = bitcast <2 x i64> %__b to <2 x double>
21067  %2 = fcmp oeq <2 x double> %0, %1
21068  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21069  %4 = bitcast <4 x i1> %3 to i4
21070  ret i4 %4
21071}
21072
21073define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21074; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21075; VLX:       # %bb.0: # %entry
21076; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21077; VLX-NEXT:    kmovb %k0, %eax
21078; VLX-NEXT:    retq
21079;
21080; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21081; NoVLX:       # %bb.0: # %entry
21082; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21083; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21084; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21085; NoVLX-NEXT:    kmovw %k0, %eax
21086; NoVLX-NEXT:    andl $3, %eax
21087; NoVLX-NEXT:    vzeroupper
21088; NoVLX-NEXT:    retq
21089entry:
21090  %0 = bitcast <2 x i64> %__a to <2 x double>
21091  %load = load <2 x i64>, ptr %__b
21092  %1 = bitcast <2 x i64> %load to <2 x double>
21093  %2 = fcmp oeq <2 x double> %0, %1
21094  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21095  %4 = bitcast <4 x i1> %3 to i4
21096  ret i4 %4
21097}
21098
21099define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21100; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21101; VLX:       # %bb.0: # %entry
21102; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21103; VLX-NEXT:    kmovb %k0, %eax
21104; VLX-NEXT:    retq
21105;
21106; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21107; NoVLX:       # %bb.0: # %entry
21108; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21109; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21110; NoVLX-NEXT:    kmovw %k0, %eax
21111; NoVLX-NEXT:    andl $3, %eax
21112; NoVLX-NEXT:    vzeroupper
21113; NoVLX-NEXT:    retq
21114entry:
21115  %0 = bitcast <2 x i64> %__a to <2 x double>
21116  %load = load double, ptr %__b
21117  %vec = insertelement <2 x double> undef, double %load, i32 0
21118  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21119  %2 = fcmp oeq <2 x double> %0, %1
21120  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21121  %4 = bitcast <4 x i1> %3 to i4
21122  ret i4 %4
21123}
21124
21125define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21126; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21127; VLX:       # %bb.0: # %entry
21128; VLX-NEXT:    kmovd %edi, %k1
21129; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21130; VLX-NEXT:    kmovb %k0, %eax
21131; VLX-NEXT:    retq
21132;
21133; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21134; NoVLX:       # %bb.0: # %entry
21135; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21136; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21137; NoVLX-NEXT:    kmovw %edi, %k1
21138; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21139; NoVLX-NEXT:    kmovw %k0, %eax
21140; NoVLX-NEXT:    andl $3, %eax
21141; NoVLX-NEXT:    vzeroupper
21142; NoVLX-NEXT:    retq
21143entry:
21144  %0 = bitcast <2 x i64> %__a to <2 x double>
21145  %1 = bitcast <2 x i64> %__b to <2 x double>
21146  %2 = fcmp oeq <2 x double> %0, %1
21147  %3 = bitcast i2 %__u to <2 x i1>
21148  %4 = and <2 x i1> %2, %3
21149  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21150  %6 = bitcast <4 x i1> %5 to i4
21151  ret i4 %6
21152}
21153
21154define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21155; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21156; VLX:       # %bb.0: # %entry
21157; VLX-NEXT:    kmovd %edi, %k1
21158; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21159; VLX-NEXT:    kmovb %k0, %eax
21160; VLX-NEXT:    retq
21161;
21162; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21163; NoVLX:       # %bb.0: # %entry
21164; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21165; NoVLX-NEXT:    kmovw %edi, %k1
21166; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21167; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21168; NoVLX-NEXT:    kmovw %k0, %eax
21169; NoVLX-NEXT:    andl $3, %eax
21170; NoVLX-NEXT:    vzeroupper
21171; NoVLX-NEXT:    retq
21172entry:
21173  %0 = bitcast <2 x i64> %__a to <2 x double>
21174  %load = load <2 x i64>, ptr %__b
21175  %1 = bitcast <2 x i64> %load to <2 x double>
21176  %2 = fcmp oeq <2 x double> %0, %1
21177  %3 = bitcast i2 %__u to <2 x i1>
21178  %4 = and <2 x i1> %2, %3
21179  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21180  %6 = bitcast <4 x i1> %5 to i4
21181  ret i4 %6
21182}
21183
21184define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21185; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21186; VLX:       # %bb.0: # %entry
21187; VLX-NEXT:    kmovd %edi, %k1
21188; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21189; VLX-NEXT:    kmovb %k0, %eax
21190; VLX-NEXT:    retq
21191;
21192; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21193; NoVLX:       # %bb.0: # %entry
21194; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21195; NoVLX-NEXT:    kmovw %edi, %k1
21196; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21197; NoVLX-NEXT:    kmovw %k0, %eax
21198; NoVLX-NEXT:    andl $3, %eax
21199; NoVLX-NEXT:    vzeroupper
21200; NoVLX-NEXT:    retq
21201entry:
21202  %0 = bitcast <2 x i64> %__a to <2 x double>
21203  %load = load double, ptr %__b
21204  %vec = insertelement <2 x double> undef, double %load, i32 0
21205  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21206  %2 = fcmp oeq <2 x double> %0, %1
21207  %3 = bitcast i2 %__u to <2 x i1>
21208  %4 = and <2 x i1> %2, %3
21209  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21210  %6 = bitcast <4 x i1> %5 to i4
21211  ret i4 %6
21212}
21213
21214
21215
21216define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21217; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21218; VLX:       # %bb.0: # %entry
21219; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21220; VLX-NEXT:    kmovd %k0, %eax
21221; VLX-NEXT:    # kill: def $al killed $al killed $eax
21222; VLX-NEXT:    retq
21223;
21224; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21225; NoVLX:       # %bb.0: # %entry
21226; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21227; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21228; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21229; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21230; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21231; NoVLX-NEXT:    kmovw %k0, %eax
21232; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21233; NoVLX-NEXT:    vzeroupper
21234; NoVLX-NEXT:    retq
21235entry:
21236  %0 = bitcast <2 x i64> %__a to <2 x double>
21237  %1 = bitcast <2 x i64> %__b to <2 x double>
21238  %2 = fcmp oeq <2 x double> %0, %1
21239  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21240  %4 = bitcast <8 x i1> %3 to i8
21241  ret i8 %4
21242}
21243
21244define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21245; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21246; VLX:       # %bb.0: # %entry
21247; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21248; VLX-NEXT:    kmovd %k0, %eax
21249; VLX-NEXT:    # kill: def $al killed $al killed $eax
21250; VLX-NEXT:    retq
21251;
21252; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21253; NoVLX:       # %bb.0: # %entry
21254; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21255; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21256; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21257; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21258; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21259; NoVLX-NEXT:    kmovw %k0, %eax
21260; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21261; NoVLX-NEXT:    vzeroupper
21262; NoVLX-NEXT:    retq
21263entry:
21264  %0 = bitcast <2 x i64> %__a to <2 x double>
21265  %load = load <2 x i64>, ptr %__b
21266  %1 = bitcast <2 x i64> %load to <2 x double>
21267  %2 = fcmp oeq <2 x double> %0, %1
21268  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21269  %4 = bitcast <8 x i1> %3 to i8
21270  ret i8 %4
21271}
21272
21273define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21274; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21275; VLX:       # %bb.0: # %entry
21276; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21277; VLX-NEXT:    kmovd %k0, %eax
21278; VLX-NEXT:    # kill: def $al killed $al killed $eax
21279; VLX-NEXT:    retq
21280;
21281; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21282; NoVLX:       # %bb.0: # %entry
21283; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21284; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21285; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21286; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21287; NoVLX-NEXT:    kmovw %k0, %eax
21288; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21289; NoVLX-NEXT:    vzeroupper
21290; NoVLX-NEXT:    retq
21291entry:
21292  %0 = bitcast <2 x i64> %__a to <2 x double>
21293  %load = load double, ptr %__b
21294  %vec = insertelement <2 x double> undef, double %load, i32 0
21295  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21296  %2 = fcmp oeq <2 x double> %0, %1
21297  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21298  %4 = bitcast <8 x i1> %3 to i8
21299  ret i8 %4
21300}
21301
21302define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21303; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21304; VLX:       # %bb.0: # %entry
21305; VLX-NEXT:    kmovd %edi, %k1
21306; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21307; VLX-NEXT:    kmovd %k0, %eax
21308; VLX-NEXT:    # kill: def $al killed $al killed $eax
21309; VLX-NEXT:    retq
21310;
21311; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21312; NoVLX:       # %bb.0: # %entry
21313; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21314; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21315; NoVLX-NEXT:    kmovw %edi, %k1
21316; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21317; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21318; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21319; NoVLX-NEXT:    kmovw %k0, %eax
21320; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21321; NoVLX-NEXT:    vzeroupper
21322; NoVLX-NEXT:    retq
21323entry:
21324  %0 = bitcast <2 x i64> %__a to <2 x double>
21325  %1 = bitcast <2 x i64> %__b to <2 x double>
21326  %2 = fcmp oeq <2 x double> %0, %1
21327  %3 = bitcast i2 %__u to <2 x i1>
21328  %4 = and <2 x i1> %2, %3
21329  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21330  %6 = bitcast <8 x i1> %5 to i8
21331  ret i8 %6
21332}
21333
21334define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21335; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21336; VLX:       # %bb.0: # %entry
21337; VLX-NEXT:    kmovd %edi, %k1
21338; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21339; VLX-NEXT:    kmovd %k0, %eax
21340; VLX-NEXT:    # kill: def $al killed $al killed $eax
21341; VLX-NEXT:    retq
21342;
21343; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21344; NoVLX:       # %bb.0: # %entry
21345; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21346; NoVLX-NEXT:    kmovw %edi, %k1
21347; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21348; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21349; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21350; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21351; NoVLX-NEXT:    kmovw %k0, %eax
21352; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21353; NoVLX-NEXT:    vzeroupper
21354; NoVLX-NEXT:    retq
21355entry:
21356  %0 = bitcast <2 x i64> %__a to <2 x double>
21357  %load = load <2 x i64>, ptr %__b
21358  %1 = bitcast <2 x i64> %load to <2 x double>
21359  %2 = fcmp oeq <2 x double> %0, %1
21360  %3 = bitcast i2 %__u to <2 x i1>
21361  %4 = and <2 x i1> %2, %3
21362  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21363  %6 = bitcast <8 x i1> %5 to i8
21364  ret i8 %6
21365}
21366
21367define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21368; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21369; VLX:       # %bb.0: # %entry
21370; VLX-NEXT:    kmovd %edi, %k1
21371; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21372; VLX-NEXT:    kmovd %k0, %eax
21373; VLX-NEXT:    # kill: def $al killed $al killed $eax
21374; VLX-NEXT:    retq
21375;
21376; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21377; NoVLX:       # %bb.0: # %entry
21378; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21379; NoVLX-NEXT:    kmovw %edi, %k1
21380; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21381; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21382; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21383; NoVLX-NEXT:    kmovw %k0, %eax
21384; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21385; NoVLX-NEXT:    vzeroupper
21386; NoVLX-NEXT:    retq
21387entry:
21388  %0 = bitcast <2 x i64> %__a to <2 x double>
21389  %load = load double, ptr %__b
21390  %vec = insertelement <2 x double> undef, double %load, i32 0
21391  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21392  %2 = fcmp oeq <2 x double> %0, %1
21393  %3 = bitcast i2 %__u to <2 x i1>
21394  %4 = and <2 x i1> %2, %3
21395  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21396  %6 = bitcast <8 x i1> %5 to i8
21397  ret i8 %6
21398}
21399
21400
21401
21402define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21403; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21404; VLX:       # %bb.0: # %entry
21405; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21406; VLX-NEXT:    kmovd %k0, %eax
21407; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21408; VLX-NEXT:    retq
21409;
21410; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21411; NoVLX:       # %bb.0: # %entry
21412; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21413; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21414; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21415; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21416; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21417; NoVLX-NEXT:    kmovw %k0, %eax
21418; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21419; NoVLX-NEXT:    vzeroupper
21420; NoVLX-NEXT:    retq
21421entry:
21422  %0 = bitcast <2 x i64> %__a to <2 x double>
21423  %1 = bitcast <2 x i64> %__b to <2 x double>
21424  %2 = fcmp oeq <2 x double> %0, %1
21425  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21426  %4 = bitcast <16 x i1> %3 to i16
21427  ret i16 %4
21428}
21429
21430define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21431; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21432; VLX:       # %bb.0: # %entry
21433; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21434; VLX-NEXT:    kmovd %k0, %eax
21435; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21436; VLX-NEXT:    retq
21437;
21438; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21439; NoVLX:       # %bb.0: # %entry
21440; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21441; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21442; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21443; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21444; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21445; NoVLX-NEXT:    kmovw %k0, %eax
21446; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21447; NoVLX-NEXT:    vzeroupper
21448; NoVLX-NEXT:    retq
21449entry:
21450  %0 = bitcast <2 x i64> %__a to <2 x double>
21451  %load = load <2 x i64>, ptr %__b
21452  %1 = bitcast <2 x i64> %load to <2 x double>
21453  %2 = fcmp oeq <2 x double> %0, %1
21454  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21455  %4 = bitcast <16 x i1> %3 to i16
21456  ret i16 %4
21457}
21458
21459define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21460; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21461; VLX:       # %bb.0: # %entry
21462; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21463; VLX-NEXT:    kmovd %k0, %eax
21464; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21465; VLX-NEXT:    retq
21466;
21467; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21468; NoVLX:       # %bb.0: # %entry
21469; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21470; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21471; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21472; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21473; NoVLX-NEXT:    kmovw %k0, %eax
21474; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21475; NoVLX-NEXT:    vzeroupper
21476; NoVLX-NEXT:    retq
21477entry:
21478  %0 = bitcast <2 x i64> %__a to <2 x double>
21479  %load = load double, ptr %__b
21480  %vec = insertelement <2 x double> undef, double %load, i32 0
21481  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21482  %2 = fcmp oeq <2 x double> %0, %1
21483  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21484  %4 = bitcast <16 x i1> %3 to i16
21485  ret i16 %4
21486}
21487
21488define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21489; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21490; VLX:       # %bb.0: # %entry
21491; VLX-NEXT:    kmovd %edi, %k1
21492; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21493; VLX-NEXT:    kmovd %k0, %eax
21494; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21495; VLX-NEXT:    retq
21496;
21497; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21498; NoVLX:       # %bb.0: # %entry
21499; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21500; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21501; NoVLX-NEXT:    kmovw %edi, %k1
21502; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21503; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21504; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21505; NoVLX-NEXT:    kmovw %k0, %eax
21506; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21507; NoVLX-NEXT:    vzeroupper
21508; NoVLX-NEXT:    retq
21509entry:
21510  %0 = bitcast <2 x i64> %__a to <2 x double>
21511  %1 = bitcast <2 x i64> %__b to <2 x double>
21512  %2 = fcmp oeq <2 x double> %0, %1
21513  %3 = bitcast i2 %__u to <2 x i1>
21514  %4 = and <2 x i1> %2, %3
21515  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21516  %6 = bitcast <16 x i1> %5 to i16
21517  ret i16 %6
21518}
21519
21520define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21521; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21522; VLX:       # %bb.0: # %entry
21523; VLX-NEXT:    kmovd %edi, %k1
21524; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21525; VLX-NEXT:    kmovd %k0, %eax
21526; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21527; VLX-NEXT:    retq
21528;
21529; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21530; NoVLX:       # %bb.0: # %entry
21531; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21532; NoVLX-NEXT:    kmovw %edi, %k1
21533; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21534; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21535; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21536; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21537; NoVLX-NEXT:    kmovw %k0, %eax
21538; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21539; NoVLX-NEXT:    vzeroupper
21540; NoVLX-NEXT:    retq
21541entry:
21542  %0 = bitcast <2 x i64> %__a to <2 x double>
21543  %load = load <2 x i64>, ptr %__b
21544  %1 = bitcast <2 x i64> %load to <2 x double>
21545  %2 = fcmp oeq <2 x double> %0, %1
21546  %3 = bitcast i2 %__u to <2 x i1>
21547  %4 = and <2 x i1> %2, %3
21548  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21549  %6 = bitcast <16 x i1> %5 to i16
21550  ret i16 %6
21551}
21552
21553define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21554; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21555; VLX:       # %bb.0: # %entry
21556; VLX-NEXT:    kmovd %edi, %k1
21557; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21558; VLX-NEXT:    kmovd %k0, %eax
21559; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21560; VLX-NEXT:    retq
21561;
21562; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21563; NoVLX:       # %bb.0: # %entry
21564; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21565; NoVLX-NEXT:    kmovw %edi, %k1
21566; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21567; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21568; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21569; NoVLX-NEXT:    kmovw %k0, %eax
21570; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21571; NoVLX-NEXT:    vzeroupper
21572; NoVLX-NEXT:    retq
21573entry:
21574  %0 = bitcast <2 x i64> %__a to <2 x double>
21575  %load = load double, ptr %__b
21576  %vec = insertelement <2 x double> undef, double %load, i32 0
21577  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21578  %2 = fcmp oeq <2 x double> %0, %1
21579  %3 = bitcast i2 %__u to <2 x i1>
21580  %4 = and <2 x i1> %2, %3
21581  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21582  %6 = bitcast <16 x i1> %5 to i16
21583  ret i16 %6
21584}
21585
21586
21587
21588define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21589; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21590; VLX:       # %bb.0: # %entry
21591; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21592; VLX-NEXT:    kmovd %k0, %eax
21593; VLX-NEXT:    retq
21594;
21595; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21596; NoVLX:       # %bb.0: # %entry
21597; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21598; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21599; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21600; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21601; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21602; NoVLX-NEXT:    kmovw %k0, %eax
21603; NoVLX-NEXT:    vzeroupper
21604; NoVLX-NEXT:    retq
21605entry:
21606  %0 = bitcast <2 x i64> %__a to <2 x double>
21607  %1 = bitcast <2 x i64> %__b to <2 x double>
21608  %2 = fcmp oeq <2 x double> %0, %1
21609  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21610  %4 = bitcast <32 x i1> %3 to i32
21611  ret i32 %4
21612}
21613
21614define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21615; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21616; VLX:       # %bb.0: # %entry
21617; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21618; VLX-NEXT:    kmovd %k0, %eax
21619; VLX-NEXT:    retq
21620;
21621; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21622; NoVLX:       # %bb.0: # %entry
21623; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21624; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21625; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21626; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21627; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21628; NoVLX-NEXT:    kmovw %k0, %eax
21629; NoVLX-NEXT:    vzeroupper
21630; NoVLX-NEXT:    retq
21631entry:
21632  %0 = bitcast <2 x i64> %__a to <2 x double>
21633  %load = load <2 x i64>, ptr %__b
21634  %1 = bitcast <2 x i64> %load to <2 x double>
21635  %2 = fcmp oeq <2 x double> %0, %1
21636  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21637  %4 = bitcast <32 x i1> %3 to i32
21638  ret i32 %4
21639}
21640
21641define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21642; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21643; VLX:       # %bb.0: # %entry
21644; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21645; VLX-NEXT:    kmovd %k0, %eax
21646; VLX-NEXT:    retq
21647;
21648; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21649; NoVLX:       # %bb.0: # %entry
21650; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21651; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21652; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21653; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21654; NoVLX-NEXT:    kmovw %k0, %eax
21655; NoVLX-NEXT:    vzeroupper
21656; NoVLX-NEXT:    retq
21657entry:
21658  %0 = bitcast <2 x i64> %__a to <2 x double>
21659  %load = load double, ptr %__b
21660  %vec = insertelement <2 x double> undef, double %load, i32 0
21661  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21662  %2 = fcmp oeq <2 x double> %0, %1
21663  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21664  %4 = bitcast <32 x i1> %3 to i32
21665  ret i32 %4
21666}
21667
21668define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21669; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21670; VLX:       # %bb.0: # %entry
21671; VLX-NEXT:    kmovd %edi, %k1
21672; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21673; VLX-NEXT:    kmovd %k0, %eax
21674; VLX-NEXT:    retq
21675;
21676; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21677; NoVLX:       # %bb.0: # %entry
21678; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21679; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21680; NoVLX-NEXT:    kmovw %edi, %k1
21681; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21682; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21683; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21684; NoVLX-NEXT:    kmovw %k0, %eax
21685; NoVLX-NEXT:    vzeroupper
21686; NoVLX-NEXT:    retq
21687entry:
21688  %0 = bitcast <2 x i64> %__a to <2 x double>
21689  %1 = bitcast <2 x i64> %__b to <2 x double>
21690  %2 = fcmp oeq <2 x double> %0, %1
21691  %3 = bitcast i2 %__u to <2 x i1>
21692  %4 = and <2 x i1> %2, %3
21693  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21694  %6 = bitcast <32 x i1> %5 to i32
21695  ret i32 %6
21696}
21697
21698define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21699; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21700; VLX:       # %bb.0: # %entry
21701; VLX-NEXT:    kmovd %edi, %k1
21702; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21703; VLX-NEXT:    kmovd %k0, %eax
21704; VLX-NEXT:    retq
21705;
21706; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21707; NoVLX:       # %bb.0: # %entry
21708; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21709; NoVLX-NEXT:    kmovw %edi, %k1
21710; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21711; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21712; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21713; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21714; NoVLX-NEXT:    kmovw %k0, %eax
21715; NoVLX-NEXT:    vzeroupper
21716; NoVLX-NEXT:    retq
21717entry:
21718  %0 = bitcast <2 x i64> %__a to <2 x double>
21719  %load = load <2 x i64>, ptr %__b
21720  %1 = bitcast <2 x i64> %load to <2 x double>
21721  %2 = fcmp oeq <2 x double> %0, %1
21722  %3 = bitcast i2 %__u to <2 x i1>
21723  %4 = and <2 x i1> %2, %3
21724  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21725  %6 = bitcast <32 x i1> %5 to i32
21726  ret i32 %6
21727}
21728
21729define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21730; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21731; VLX:       # %bb.0: # %entry
21732; VLX-NEXT:    kmovd %edi, %k1
21733; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21734; VLX-NEXT:    kmovd %k0, %eax
21735; VLX-NEXT:    retq
21736;
21737; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21738; NoVLX:       # %bb.0: # %entry
21739; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21740; NoVLX-NEXT:    kmovw %edi, %k1
21741; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21742; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21743; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21744; NoVLX-NEXT:    kmovw %k0, %eax
21745; NoVLX-NEXT:    vzeroupper
21746; NoVLX-NEXT:    retq
21747entry:
21748  %0 = bitcast <2 x i64> %__a to <2 x double>
21749  %load = load double, ptr %__b
21750  %vec = insertelement <2 x double> undef, double %load, i32 0
21751  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21752  %2 = fcmp oeq <2 x double> %0, %1
21753  %3 = bitcast i2 %__u to <2 x i1>
21754  %4 = and <2 x i1> %2, %3
21755  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21756  %6 = bitcast <32 x i1> %5 to i32
21757  ret i32 %6
21758}
21759
21760
21761
21762define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21763; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21764; VLX:       # %bb.0: # %entry
21765; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21766; VLX-NEXT:    kmovq %k0, %rax
21767; VLX-NEXT:    retq
21768;
21769; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21770; NoVLX:       # %bb.0: # %entry
21771; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21772; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21773; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21774; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21775; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21776; NoVLX-NEXT:    kmovw %k0, %eax
21777; NoVLX-NEXT:    vzeroupper
21778; NoVLX-NEXT:    retq
21779entry:
21780  %0 = bitcast <2 x i64> %__a to <2 x double>
21781  %1 = bitcast <2 x i64> %__b to <2 x double>
21782  %2 = fcmp oeq <2 x double> %0, %1
21783  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21784  %4 = bitcast <64 x i1> %3 to i64
21785  ret i64 %4
21786}
21787
21788define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21789; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21790; VLX:       # %bb.0: # %entry
21791; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21792; VLX-NEXT:    kmovq %k0, %rax
21793; VLX-NEXT:    retq
21794;
21795; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21796; NoVLX:       # %bb.0: # %entry
21797; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21798; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21799; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21800; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21801; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21802; NoVLX-NEXT:    kmovw %k0, %eax
21803; NoVLX-NEXT:    vzeroupper
21804; NoVLX-NEXT:    retq
21805entry:
21806  %0 = bitcast <2 x i64> %__a to <2 x double>
21807  %load = load <2 x i64>, ptr %__b
21808  %1 = bitcast <2 x i64> %load to <2 x double>
21809  %2 = fcmp oeq <2 x double> %0, %1
21810  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21811  %4 = bitcast <64 x i1> %3 to i64
21812  ret i64 %4
21813}
21814
21815define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21816; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21817; VLX:       # %bb.0: # %entry
21818; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21819; VLX-NEXT:    kmovq %k0, %rax
21820; VLX-NEXT:    retq
21821;
21822; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21823; NoVLX:       # %bb.0: # %entry
21824; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21825; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21826; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21827; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21828; NoVLX-NEXT:    kmovw %k0, %eax
21829; NoVLX-NEXT:    vzeroupper
21830; NoVLX-NEXT:    retq
21831entry:
21832  %0 = bitcast <2 x i64> %__a to <2 x double>
21833  %load = load double, ptr %__b
21834  %vec = insertelement <2 x double> undef, double %load, i32 0
21835  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21836  %2 = fcmp oeq <2 x double> %0, %1
21837  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21838  %4 = bitcast <64 x i1> %3 to i64
21839  ret i64 %4
21840}
21841
21842define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21843; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21844; VLX:       # %bb.0: # %entry
21845; VLX-NEXT:    kmovd %edi, %k1
21846; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21847; VLX-NEXT:    kmovq %k0, %rax
21848; VLX-NEXT:    retq
21849;
21850; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21851; NoVLX:       # %bb.0: # %entry
21852; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21853; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21854; NoVLX-NEXT:    kmovw %edi, %k1
21855; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21856; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21857; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21858; NoVLX-NEXT:    kmovw %k0, %eax
21859; NoVLX-NEXT:    vzeroupper
21860; NoVLX-NEXT:    retq
21861entry:
21862  %0 = bitcast <2 x i64> %__a to <2 x double>
21863  %1 = bitcast <2 x i64> %__b to <2 x double>
21864  %2 = fcmp oeq <2 x double> %0, %1
21865  %3 = bitcast i2 %__u to <2 x i1>
21866  %4 = and <2 x i1> %2, %3
21867  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21868  %6 = bitcast <64 x i1> %5 to i64
21869  ret i64 %6
21870}
21871
21872define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21873; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21874; VLX:       # %bb.0: # %entry
21875; VLX-NEXT:    kmovd %edi, %k1
21876; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21877; VLX-NEXT:    kmovq %k0, %rax
21878; VLX-NEXT:    retq
21879;
21880; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21881; NoVLX:       # %bb.0: # %entry
21882; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21883; NoVLX-NEXT:    kmovw %edi, %k1
21884; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21885; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21886; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21887; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21888; NoVLX-NEXT:    kmovw %k0, %eax
21889; NoVLX-NEXT:    vzeroupper
21890; NoVLX-NEXT:    retq
21891entry:
21892  %0 = bitcast <2 x i64> %__a to <2 x double>
21893  %load = load <2 x i64>, ptr %__b
21894  %1 = bitcast <2 x i64> %load to <2 x double>
21895  %2 = fcmp oeq <2 x double> %0, %1
21896  %3 = bitcast i2 %__u to <2 x i1>
21897  %4 = and <2 x i1> %2, %3
21898  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21899  %6 = bitcast <64 x i1> %5 to i64
21900  ret i64 %6
21901}
21902
21903define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21904; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21905; VLX:       # %bb.0: # %entry
21906; VLX-NEXT:    kmovd %edi, %k1
21907; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21908; VLX-NEXT:    kmovq %k0, %rax
21909; VLX-NEXT:    retq
21910;
21911; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21912; NoVLX:       # %bb.0: # %entry
21913; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21914; NoVLX-NEXT:    kmovw %edi, %k1
21915; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21916; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21917; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21918; NoVLX-NEXT:    kmovw %k0, %eax
21919; NoVLX-NEXT:    vzeroupper
21920; NoVLX-NEXT:    retq
21921entry:
21922  %0 = bitcast <2 x i64> %__a to <2 x double>
21923  %load = load double, ptr %__b
21924  %vec = insertelement <2 x double> undef, double %load, i32 0
21925  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21926  %2 = fcmp oeq <2 x double> %0, %1
21927  %3 = bitcast i2 %__u to <2 x i1>
21928  %4 = and <2 x i1> %2, %3
21929  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21930  %6 = bitcast <64 x i1> %5 to i64
21931  ret i64 %6
21932}
21933
21934
21935
21936define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
21937; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21938; VLX:       # %bb.0: # %entry
21939; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
21940; VLX-NEXT:    kmovd %k0, %eax
21941; VLX-NEXT:    # kill: def $al killed $al killed $eax
21942; VLX-NEXT:    vzeroupper
21943; VLX-NEXT:    retq
21944;
21945; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21946; NoVLX:       # %bb.0: # %entry
21947; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
21948; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
21949; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21950; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
21951; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
21952; NoVLX-NEXT:    kmovw %k0, %eax
21953; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21954; NoVLX-NEXT:    vzeroupper
21955; NoVLX-NEXT:    retq
21956entry:
21957  %0 = bitcast <4 x i64> %__a to <4 x double>
21958  %1 = bitcast <4 x i64> %__b to <4 x double>
21959  %2 = fcmp oeq <4 x double> %0, %1
21960  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21961  %4 = bitcast <8 x i1> %3 to i8
21962  ret i8 %4
21963}
21964
21965define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
21966; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21967; VLX:       # %bb.0: # %entry
21968; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
21969; VLX-NEXT:    kmovd %k0, %eax
21970; VLX-NEXT:    # kill: def $al killed $al killed $eax
21971; VLX-NEXT:    vzeroupper
21972; VLX-NEXT:    retq
21973;
21974; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21975; NoVLX:       # %bb.0: # %entry
21976; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
21977; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
21978; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21979; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
21980; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
21981; NoVLX-NEXT:    kmovw %k0, %eax
21982; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21983; NoVLX-NEXT:    vzeroupper
21984; NoVLX-NEXT:    retq
21985entry:
21986  %0 = bitcast <4 x i64> %__a to <4 x double>
21987  %load = load <4 x i64>, ptr %__b
21988  %1 = bitcast <4 x i64> %load to <4 x double>
21989  %2 = fcmp oeq <4 x double> %0, %1
21990  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21991  %4 = bitcast <8 x i1> %3 to i8
21992  ret i8 %4
21993}
21994
21995define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
21996; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
21997; VLX:       # %bb.0: # %entry
21998; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
21999; VLX-NEXT:    kmovd %k0, %eax
22000; VLX-NEXT:    # kill: def $al killed $al killed $eax
22001; VLX-NEXT:    vzeroupper
22002; VLX-NEXT:    retq
22003;
22004; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22005; NoVLX:       # %bb.0: # %entry
22006; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22007; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22008; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22009; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22010; NoVLX-NEXT:    kmovw %k0, %eax
22011; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22012; NoVLX-NEXT:    vzeroupper
22013; NoVLX-NEXT:    retq
22014entry:
22015  %0 = bitcast <4 x i64> %__a to <4 x double>
22016  %load = load double, ptr %__b
22017  %vec = insertelement <4 x double> undef, double %load, i32 0
22018  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22019  %2 = fcmp oeq <4 x double> %0, %1
22020  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22021  %4 = bitcast <8 x i1> %3 to i8
22022  ret i8 %4
22023}
22024
22025define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22026; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22027; VLX:       # %bb.0: # %entry
22028; VLX-NEXT:    kmovd %edi, %k1
22029; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22030; VLX-NEXT:    kmovd %k0, %eax
22031; VLX-NEXT:    # kill: def $al killed $al killed $eax
22032; VLX-NEXT:    vzeroupper
22033; VLX-NEXT:    retq
22034;
22035; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22036; NoVLX:       # %bb.0: # %entry
22037; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22038; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22039; NoVLX-NEXT:    kmovw %edi, %k1
22040; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22041; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22042; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22043; NoVLX-NEXT:    kmovw %k0, %eax
22044; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22045; NoVLX-NEXT:    vzeroupper
22046; NoVLX-NEXT:    retq
22047entry:
22048  %0 = bitcast <4 x i64> %__a to <4 x double>
22049  %1 = bitcast <4 x i64> %__b to <4 x double>
22050  %2 = fcmp oeq <4 x double> %0, %1
22051  %3 = bitcast i4 %__u to <4 x i1>
22052  %4 = and <4 x i1> %2, %3
22053  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22054  %6 = bitcast <8 x i1> %5 to i8
22055  ret i8 %6
22056}
22057
22058define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22059; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22060; VLX:       # %bb.0: # %entry
22061; VLX-NEXT:    kmovd %edi, %k1
22062; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22063; VLX-NEXT:    kmovd %k0, %eax
22064; VLX-NEXT:    # kill: def $al killed $al killed $eax
22065; VLX-NEXT:    vzeroupper
22066; VLX-NEXT:    retq
22067;
22068; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22069; NoVLX:       # %bb.0: # %entry
22070; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22071; NoVLX-NEXT:    kmovw %edi, %k1
22072; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22073; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22074; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22075; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22076; NoVLX-NEXT:    kmovw %k0, %eax
22077; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22078; NoVLX-NEXT:    vzeroupper
22079; NoVLX-NEXT:    retq
22080entry:
22081  %0 = bitcast <4 x i64> %__a to <4 x double>
22082  %load = load <4 x i64>, ptr %__b
22083  %1 = bitcast <4 x i64> %load to <4 x double>
22084  %2 = fcmp oeq <4 x double> %0, %1
22085  %3 = bitcast i4 %__u to <4 x i1>
22086  %4 = and <4 x i1> %2, %3
22087  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22088  %6 = bitcast <8 x i1> %5 to i8
22089  ret i8 %6
22090}
22091
22092define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22093; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22094; VLX:       # %bb.0: # %entry
22095; VLX-NEXT:    kmovd %edi, %k1
22096; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22097; VLX-NEXT:    kmovd %k0, %eax
22098; VLX-NEXT:    # kill: def $al killed $al killed $eax
22099; VLX-NEXT:    vzeroupper
22100; VLX-NEXT:    retq
22101;
22102; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22103; NoVLX:       # %bb.0: # %entry
22104; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22105; NoVLX-NEXT:    kmovw %edi, %k1
22106; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22107; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22108; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22109; NoVLX-NEXT:    kmovw %k0, %eax
22110; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22111; NoVLX-NEXT:    vzeroupper
22112; NoVLX-NEXT:    retq
22113entry:
22114  %0 = bitcast <4 x i64> %__a to <4 x double>
22115  %load = load double, ptr %__b
22116  %vec = insertelement <4 x double> undef, double %load, i32 0
22117  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22118  %2 = fcmp oeq <4 x double> %0, %1
22119  %3 = bitcast i4 %__u to <4 x i1>
22120  %4 = and <4 x i1> %2, %3
22121  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22122  %6 = bitcast <8 x i1> %5 to i8
22123  ret i8 %6
22124}
22125
22126
22127
22128define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22129; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22130; VLX:       # %bb.0: # %entry
22131; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22132; VLX-NEXT:    kmovd %k0, %eax
22133; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22134; VLX-NEXT:    vzeroupper
22135; VLX-NEXT:    retq
22136;
22137; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22138; NoVLX:       # %bb.0: # %entry
22139; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22140; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22141; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22142; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22143; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22144; NoVLX-NEXT:    kmovw %k0, %eax
22145; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22146; NoVLX-NEXT:    vzeroupper
22147; NoVLX-NEXT:    retq
22148entry:
22149  %0 = bitcast <4 x i64> %__a to <4 x double>
22150  %1 = bitcast <4 x i64> %__b to <4 x double>
22151  %2 = fcmp oeq <4 x double> %0, %1
22152  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22153  %4 = bitcast <16 x i1> %3 to i16
22154  ret i16 %4
22155}
22156
22157define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22158; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22159; VLX:       # %bb.0: # %entry
22160; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22161; VLX-NEXT:    kmovd %k0, %eax
22162; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22163; VLX-NEXT:    vzeroupper
22164; VLX-NEXT:    retq
22165;
22166; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22167; NoVLX:       # %bb.0: # %entry
22168; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22169; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22170; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22171; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22172; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22173; NoVLX-NEXT:    kmovw %k0, %eax
22174; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22175; NoVLX-NEXT:    vzeroupper
22176; NoVLX-NEXT:    retq
22177entry:
22178  %0 = bitcast <4 x i64> %__a to <4 x double>
22179  %load = load <4 x i64>, ptr %__b
22180  %1 = bitcast <4 x i64> %load to <4 x double>
22181  %2 = fcmp oeq <4 x double> %0, %1
22182  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22183  %4 = bitcast <16 x i1> %3 to i16
22184  ret i16 %4
22185}
22186
22187define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22188; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22189; VLX:       # %bb.0: # %entry
22190; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22191; VLX-NEXT:    kmovd %k0, %eax
22192; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22193; VLX-NEXT:    vzeroupper
22194; VLX-NEXT:    retq
22195;
22196; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22197; NoVLX:       # %bb.0: # %entry
22198; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22199; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22200; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22201; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22202; NoVLX-NEXT:    kmovw %k0, %eax
22203; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22204; NoVLX-NEXT:    vzeroupper
22205; NoVLX-NEXT:    retq
22206entry:
22207  %0 = bitcast <4 x i64> %__a to <4 x double>
22208  %load = load double, ptr %__b
22209  %vec = insertelement <4 x double> undef, double %load, i32 0
22210  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22211  %2 = fcmp oeq <4 x double> %0, %1
22212  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22213  %4 = bitcast <16 x i1> %3 to i16
22214  ret i16 %4
22215}
22216
22217define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22218; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22219; VLX:       # %bb.0: # %entry
22220; VLX-NEXT:    kmovd %edi, %k1
22221; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22222; VLX-NEXT:    kmovd %k0, %eax
22223; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22224; VLX-NEXT:    vzeroupper
22225; VLX-NEXT:    retq
22226;
22227; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22228; NoVLX:       # %bb.0: # %entry
22229; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22230; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22231; NoVLX-NEXT:    kmovw %edi, %k1
22232; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22233; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22234; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22235; NoVLX-NEXT:    kmovw %k0, %eax
22236; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22237; NoVLX-NEXT:    vzeroupper
22238; NoVLX-NEXT:    retq
22239entry:
22240  %0 = bitcast <4 x i64> %__a to <4 x double>
22241  %1 = bitcast <4 x i64> %__b to <4 x double>
22242  %2 = fcmp oeq <4 x double> %0, %1
22243  %3 = bitcast i4 %__u to <4 x i1>
22244  %4 = and <4 x i1> %2, %3
22245  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22246  %6 = bitcast <16 x i1> %5 to i16
22247  ret i16 %6
22248}
22249
22250define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22251; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22252; VLX:       # %bb.0: # %entry
22253; VLX-NEXT:    kmovd %edi, %k1
22254; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22255; VLX-NEXT:    kmovd %k0, %eax
22256; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22257; VLX-NEXT:    vzeroupper
22258; VLX-NEXT:    retq
22259;
22260; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22261; NoVLX:       # %bb.0: # %entry
22262; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22263; NoVLX-NEXT:    kmovw %edi, %k1
22264; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22265; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22266; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22267; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22268; NoVLX-NEXT:    kmovw %k0, %eax
22269; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22270; NoVLX-NEXT:    vzeroupper
22271; NoVLX-NEXT:    retq
22272entry:
22273  %0 = bitcast <4 x i64> %__a to <4 x double>
22274  %load = load <4 x i64>, ptr %__b
22275  %1 = bitcast <4 x i64> %load to <4 x double>
22276  %2 = fcmp oeq <4 x double> %0, %1
22277  %3 = bitcast i4 %__u to <4 x i1>
22278  %4 = and <4 x i1> %2, %3
22279  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22280  %6 = bitcast <16 x i1> %5 to i16
22281  ret i16 %6
22282}
22283
22284define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22285; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22286; VLX:       # %bb.0: # %entry
22287; VLX-NEXT:    kmovd %edi, %k1
22288; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22289; VLX-NEXT:    kmovd %k0, %eax
22290; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22291; VLX-NEXT:    vzeroupper
22292; VLX-NEXT:    retq
22293;
22294; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22295; NoVLX:       # %bb.0: # %entry
22296; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22297; NoVLX-NEXT:    kmovw %edi, %k1
22298; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22299; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22300; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22301; NoVLX-NEXT:    kmovw %k0, %eax
22302; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22303; NoVLX-NEXT:    vzeroupper
22304; NoVLX-NEXT:    retq
22305entry:
22306  %0 = bitcast <4 x i64> %__a to <4 x double>
22307  %load = load double, ptr %__b
22308  %vec = insertelement <4 x double> undef, double %load, i32 0
22309  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22310  %2 = fcmp oeq <4 x double> %0, %1
22311  %3 = bitcast i4 %__u to <4 x i1>
22312  %4 = and <4 x i1> %2, %3
22313  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22314  %6 = bitcast <16 x i1> %5 to i16
22315  ret i16 %6
22316}
22317
22318
22319
22320define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22321; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22322; VLX:       # %bb.0: # %entry
22323; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22324; VLX-NEXT:    kmovd %k0, %eax
22325; VLX-NEXT:    vzeroupper
22326; VLX-NEXT:    retq
22327;
22328; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22329; NoVLX:       # %bb.0: # %entry
22330; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22331; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22332; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22333; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22334; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22335; NoVLX-NEXT:    kmovw %k0, %eax
22336; NoVLX-NEXT:    vzeroupper
22337; NoVLX-NEXT:    retq
22338entry:
22339  %0 = bitcast <4 x i64> %__a to <4 x double>
22340  %1 = bitcast <4 x i64> %__b to <4 x double>
22341  %2 = fcmp oeq <4 x double> %0, %1
22342  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22343  %4 = bitcast <32 x i1> %3 to i32
22344  ret i32 %4
22345}
22346
22347define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22348; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22349; VLX:       # %bb.0: # %entry
22350; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22351; VLX-NEXT:    kmovd %k0, %eax
22352; VLX-NEXT:    vzeroupper
22353; VLX-NEXT:    retq
22354;
22355; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22356; NoVLX:       # %bb.0: # %entry
22357; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22358; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22359; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22360; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22361; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22362; NoVLX-NEXT:    kmovw %k0, %eax
22363; NoVLX-NEXT:    vzeroupper
22364; NoVLX-NEXT:    retq
22365entry:
22366  %0 = bitcast <4 x i64> %__a to <4 x double>
22367  %load = load <4 x i64>, ptr %__b
22368  %1 = bitcast <4 x i64> %load to <4 x double>
22369  %2 = fcmp oeq <4 x double> %0, %1
22370  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22371  %4 = bitcast <32 x i1> %3 to i32
22372  ret i32 %4
22373}
22374
22375define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22376; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22377; VLX:       # %bb.0: # %entry
22378; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22379; VLX-NEXT:    kmovd %k0, %eax
22380; VLX-NEXT:    vzeroupper
22381; VLX-NEXT:    retq
22382;
22383; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22384; NoVLX:       # %bb.0: # %entry
22385; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22386; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22387; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22388; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22389; NoVLX-NEXT:    kmovw %k0, %eax
22390; NoVLX-NEXT:    vzeroupper
22391; NoVLX-NEXT:    retq
22392entry:
22393  %0 = bitcast <4 x i64> %__a to <4 x double>
22394  %load = load double, ptr %__b
22395  %vec = insertelement <4 x double> undef, double %load, i32 0
22396  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22397  %2 = fcmp oeq <4 x double> %0, %1
22398  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22399  %4 = bitcast <32 x i1> %3 to i32
22400  ret i32 %4
22401}
22402
22403define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22404; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22405; VLX:       # %bb.0: # %entry
22406; VLX-NEXT:    kmovd %edi, %k1
22407; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22408; VLX-NEXT:    kmovd %k0, %eax
22409; VLX-NEXT:    vzeroupper
22410; VLX-NEXT:    retq
22411;
22412; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22413; NoVLX:       # %bb.0: # %entry
22414; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22415; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22416; NoVLX-NEXT:    kmovw %edi, %k1
22417; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22418; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22419; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22420; NoVLX-NEXT:    kmovw %k0, %eax
22421; NoVLX-NEXT:    vzeroupper
22422; NoVLX-NEXT:    retq
22423entry:
22424  %0 = bitcast <4 x i64> %__a to <4 x double>
22425  %1 = bitcast <4 x i64> %__b to <4 x double>
22426  %2 = fcmp oeq <4 x double> %0, %1
22427  %3 = bitcast i4 %__u to <4 x i1>
22428  %4 = and <4 x i1> %2, %3
22429  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22430  %6 = bitcast <32 x i1> %5 to i32
22431  ret i32 %6
22432}
22433
22434define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22435; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22436; VLX:       # %bb.0: # %entry
22437; VLX-NEXT:    kmovd %edi, %k1
22438; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22439; VLX-NEXT:    kmovd %k0, %eax
22440; VLX-NEXT:    vzeroupper
22441; VLX-NEXT:    retq
22442;
22443; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22444; NoVLX:       # %bb.0: # %entry
22445; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22446; NoVLX-NEXT:    kmovw %edi, %k1
22447; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22448; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22449; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22450; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22451; NoVLX-NEXT:    kmovw %k0, %eax
22452; NoVLX-NEXT:    vzeroupper
22453; NoVLX-NEXT:    retq
22454entry:
22455  %0 = bitcast <4 x i64> %__a to <4 x double>
22456  %load = load <4 x i64>, ptr %__b
22457  %1 = bitcast <4 x i64> %load to <4 x double>
22458  %2 = fcmp oeq <4 x double> %0, %1
22459  %3 = bitcast i4 %__u to <4 x i1>
22460  %4 = and <4 x i1> %2, %3
22461  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22462  %6 = bitcast <32 x i1> %5 to i32
22463  ret i32 %6
22464}
22465
22466define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22467; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22468; VLX:       # %bb.0: # %entry
22469; VLX-NEXT:    kmovd %edi, %k1
22470; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22471; VLX-NEXT:    kmovd %k0, %eax
22472; VLX-NEXT:    vzeroupper
22473; VLX-NEXT:    retq
22474;
22475; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22476; NoVLX:       # %bb.0: # %entry
22477; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22478; NoVLX-NEXT:    kmovw %edi, %k1
22479; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22480; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22481; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22482; NoVLX-NEXT:    kmovw %k0, %eax
22483; NoVLX-NEXT:    vzeroupper
22484; NoVLX-NEXT:    retq
22485entry:
22486  %0 = bitcast <4 x i64> %__a to <4 x double>
22487  %load = load double, ptr %__b
22488  %vec = insertelement <4 x double> undef, double %load, i32 0
22489  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22490  %2 = fcmp oeq <4 x double> %0, %1
22491  %3 = bitcast i4 %__u to <4 x i1>
22492  %4 = and <4 x i1> %2, %3
22493  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22494  %6 = bitcast <32 x i1> %5 to i32
22495  ret i32 %6
22496}
22497
22498
22499
22500define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22501; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22502; VLX:       # %bb.0: # %entry
22503; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22504; VLX-NEXT:    kmovq %k0, %rax
22505; VLX-NEXT:    vzeroupper
22506; VLX-NEXT:    retq
22507;
22508; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22509; NoVLX:       # %bb.0: # %entry
22510; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22511; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22512; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22513; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22514; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22515; NoVLX-NEXT:    kmovw %k0, %eax
22516; NoVLX-NEXT:    vzeroupper
22517; NoVLX-NEXT:    retq
22518entry:
22519  %0 = bitcast <4 x i64> %__a to <4 x double>
22520  %1 = bitcast <4 x i64> %__b to <4 x double>
22521  %2 = fcmp oeq <4 x double> %0, %1
22522  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22523  %4 = bitcast <64 x i1> %3 to i64
22524  ret i64 %4
22525}
22526
22527define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22528; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22529; VLX:       # %bb.0: # %entry
22530; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22531; VLX-NEXT:    kmovq %k0, %rax
22532; VLX-NEXT:    vzeroupper
22533; VLX-NEXT:    retq
22534;
22535; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22536; NoVLX:       # %bb.0: # %entry
22537; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22538; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22539; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22540; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22541; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22542; NoVLX-NEXT:    kmovw %k0, %eax
22543; NoVLX-NEXT:    vzeroupper
22544; NoVLX-NEXT:    retq
22545entry:
22546  %0 = bitcast <4 x i64> %__a to <4 x double>
22547  %load = load <4 x i64>, ptr %__b
22548  %1 = bitcast <4 x i64> %load to <4 x double>
22549  %2 = fcmp oeq <4 x double> %0, %1
22550  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22551  %4 = bitcast <64 x i1> %3 to i64
22552  ret i64 %4
22553}
22554
22555define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22556; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22557; VLX:       # %bb.0: # %entry
22558; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22559; VLX-NEXT:    kmovq %k0, %rax
22560; VLX-NEXT:    vzeroupper
22561; VLX-NEXT:    retq
22562;
22563; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22564; NoVLX:       # %bb.0: # %entry
22565; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22566; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22567; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22568; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22569; NoVLX-NEXT:    kmovw %k0, %eax
22570; NoVLX-NEXT:    vzeroupper
22571; NoVLX-NEXT:    retq
22572entry:
22573  %0 = bitcast <4 x i64> %__a to <4 x double>
22574  %load = load double, ptr %__b
22575  %vec = insertelement <4 x double> undef, double %load, i32 0
22576  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22577  %2 = fcmp oeq <4 x double> %0, %1
22578  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22579  %4 = bitcast <64 x i1> %3 to i64
22580  ret i64 %4
22581}
22582
22583define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22584; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22585; VLX:       # %bb.0: # %entry
22586; VLX-NEXT:    kmovd %edi, %k1
22587; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22588; VLX-NEXT:    kmovq %k0, %rax
22589; VLX-NEXT:    vzeroupper
22590; VLX-NEXT:    retq
22591;
22592; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22593; NoVLX:       # %bb.0: # %entry
22594; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22595; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22596; NoVLX-NEXT:    kmovw %edi, %k1
22597; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22598; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22599; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22600; NoVLX-NEXT:    kmovw %k0, %eax
22601; NoVLX-NEXT:    vzeroupper
22602; NoVLX-NEXT:    retq
22603entry:
22604  %0 = bitcast <4 x i64> %__a to <4 x double>
22605  %1 = bitcast <4 x i64> %__b to <4 x double>
22606  %2 = fcmp oeq <4 x double> %0, %1
22607  %3 = bitcast i4 %__u to <4 x i1>
22608  %4 = and <4 x i1> %2, %3
22609  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22610  %6 = bitcast <64 x i1> %5 to i64
22611  ret i64 %6
22612}
22613
22614define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22615; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22616; VLX:       # %bb.0: # %entry
22617; VLX-NEXT:    kmovd %edi, %k1
22618; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22619; VLX-NEXT:    kmovq %k0, %rax
22620; VLX-NEXT:    vzeroupper
22621; VLX-NEXT:    retq
22622;
22623; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22624; NoVLX:       # %bb.0: # %entry
22625; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22626; NoVLX-NEXT:    kmovw %edi, %k1
22627; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22628; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22629; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22630; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22631; NoVLX-NEXT:    kmovw %k0, %eax
22632; NoVLX-NEXT:    vzeroupper
22633; NoVLX-NEXT:    retq
22634entry:
22635  %0 = bitcast <4 x i64> %__a to <4 x double>
22636  %load = load <4 x i64>, ptr %__b
22637  %1 = bitcast <4 x i64> %load to <4 x double>
22638  %2 = fcmp oeq <4 x double> %0, %1
22639  %3 = bitcast i4 %__u to <4 x i1>
22640  %4 = and <4 x i1> %2, %3
22641  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22642  %6 = bitcast <64 x i1> %5 to i64
22643  ret i64 %6
22644}
22645
22646define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22647; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22648; VLX:       # %bb.0: # %entry
22649; VLX-NEXT:    kmovd %edi, %k1
22650; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22651; VLX-NEXT:    kmovq %k0, %rax
22652; VLX-NEXT:    vzeroupper
22653; VLX-NEXT:    retq
22654;
22655; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22656; NoVLX:       # %bb.0: # %entry
22657; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22658; NoVLX-NEXT:    kmovw %edi, %k1
22659; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22660; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22661; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22662; NoVLX-NEXT:    kmovw %k0, %eax
22663; NoVLX-NEXT:    vzeroupper
22664; NoVLX-NEXT:    retq
22665entry:
22666  %0 = bitcast <4 x i64> %__a to <4 x double>
22667  %load = load double, ptr %__b
22668  %vec = insertelement <4 x double> undef, double %load, i32 0
22669  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22670  %2 = fcmp oeq <4 x double> %0, %1
22671  %3 = bitcast i4 %__u to <4 x i1>
22672  %4 = and <4 x i1> %2, %3
22673  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22674  %6 = bitcast <64 x i1> %5 to i64
22675  ret i64 %6
22676}
22677
22678
22679
22680define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22681; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22682; VLX:       # %bb.0: # %entry
22683; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22684; VLX-NEXT:    kmovd %k0, %eax
22685; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22686; VLX-NEXT:    vzeroupper
22687; VLX-NEXT:    retq
22688;
22689; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22690; NoVLX:       # %bb.0: # %entry
22691; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22692; NoVLX-NEXT:    kmovw %k0, %eax
22693; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22694; NoVLX-NEXT:    vzeroupper
22695; NoVLX-NEXT:    retq
22696entry:
22697  %0 = bitcast <8 x i64> %__a to <8 x double>
22698  %1 = bitcast <8 x i64> %__b to <8 x double>
22699  %2 = fcmp oeq <8 x double> %0, %1
22700  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22701  %4 = bitcast <16 x i1> %3 to i16
22702  ret i16 %4
22703}
22704
22705define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22706; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22707; VLX:       # %bb.0: # %entry
22708; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
22709; VLX-NEXT:    kmovd %k0, %eax
22710; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22711; VLX-NEXT:    vzeroupper
22712; VLX-NEXT:    retq
22713;
22714; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22715; NoVLX:       # %bb.0: # %entry
22716; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
22717; NoVLX-NEXT:    kmovw %k0, %eax
22718; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22719; NoVLX-NEXT:    vzeroupper
22720; NoVLX-NEXT:    retq
22721entry:
22722  %0 = bitcast <8 x i64> %__a to <8 x double>
22723  %load = load <8 x i64>, ptr %__b
22724  %1 = bitcast <8 x i64> %load to <8 x double>
22725  %2 = fcmp oeq <8 x double> %0, %1
22726  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22727  %4 = bitcast <16 x i1> %3 to i16
22728  ret i16 %4
22729}
22730
22731define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22732; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22733; VLX:       # %bb.0: # %entry
22734; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22735; VLX-NEXT:    kmovd %k0, %eax
22736; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22737; VLX-NEXT:    vzeroupper
22738; VLX-NEXT:    retq
22739;
22740; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22741; NoVLX:       # %bb.0: # %entry
22742; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22743; NoVLX-NEXT:    kmovw %k0, %eax
22744; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22745; NoVLX-NEXT:    vzeroupper
22746; NoVLX-NEXT:    retq
22747entry:
22748  %0 = bitcast <8 x i64> %__a to <8 x double>
22749  %load = load double, ptr %__b
22750  %vec = insertelement <8 x double> undef, double %load, i32 0
22751  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22752  %2 = fcmp oeq <8 x double> %0, %1
22753  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22754  %4 = bitcast <16 x i1> %3 to i16
22755  ret i16 %4
22756}
22757
22758define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22759; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22760; VLX:       # %bb.0: # %entry
22761; VLX-NEXT:    kmovd %edi, %k1
22762; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22763; VLX-NEXT:    kmovd %k0, %eax
22764; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22765; VLX-NEXT:    vzeroupper
22766; VLX-NEXT:    retq
22767;
22768; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22769; NoVLX:       # %bb.0: # %entry
22770; NoVLX-NEXT:    kmovw %edi, %k1
22771; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22772; NoVLX-NEXT:    kmovw %k0, %eax
22773; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22774; NoVLX-NEXT:    vzeroupper
22775; NoVLX-NEXT:    retq
22776entry:
22777  %0 = bitcast <8 x i64> %__a to <8 x double>
22778  %1 = bitcast <8 x i64> %__b to <8 x double>
22779  %2 = fcmp oeq <8 x double> %0, %1
22780  %3 = bitcast i8 %__u to <8 x i1>
22781  %4 = and <8 x i1> %2, %3
22782  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22783  %6 = bitcast <16 x i1> %5 to i16
22784  ret i16 %6
22785}
22786
22787define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
22788; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22789; VLX:       # %bb.0: # %entry
22790; VLX-NEXT:    kmovd %edi, %k1
22791; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22792; VLX-NEXT:    kmovd %k0, %eax
22793; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22794; VLX-NEXT:    vzeroupper
22795; VLX-NEXT:    retq
22796;
22797; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22798; NoVLX:       # %bb.0: # %entry
22799; NoVLX-NEXT:    kmovw %edi, %k1
22800; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22801; NoVLX-NEXT:    kmovw %k0, %eax
22802; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22803; NoVLX-NEXT:    vzeroupper
22804; NoVLX-NEXT:    retq
22805entry:
22806  %0 = bitcast <8 x i64> %__a to <8 x double>
22807  %load = load <8 x i64>, ptr %__b
22808  %1 = bitcast <8 x i64> %load to <8 x double>
22809  %2 = fcmp oeq <8 x double> %0, %1
22810  %3 = bitcast i8 %__u to <8 x i1>
22811  %4 = and <8 x i1> %2, %3
22812  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22813  %6 = bitcast <16 x i1> %5 to i16
22814  ret i16 %6
22815}
22816
22817define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
22818; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22819; VLX:       # %bb.0: # %entry
22820; VLX-NEXT:    kmovd %edi, %k1
22821; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22822; VLX-NEXT:    kmovd %k0, %eax
22823; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22824; VLX-NEXT:    vzeroupper
22825; VLX-NEXT:    retq
22826;
22827; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22828; NoVLX:       # %bb.0: # %entry
22829; NoVLX-NEXT:    kmovw %edi, %k1
22830; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22831; NoVLX-NEXT:    kmovw %k0, %eax
22832; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22833; NoVLX-NEXT:    vzeroupper
22834; NoVLX-NEXT:    retq
22835entry:
22836  %0 = bitcast <8 x i64> %__a to <8 x double>
22837  %load = load double, ptr %__b
22838  %vec = insertelement <8 x double> undef, double %load, i32 0
22839  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22840  %2 = fcmp oeq <8 x double> %0, %1
22841  %3 = bitcast i8 %__u to <8 x i1>
22842  %4 = and <8 x i1> %2, %3
22843  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22844  %6 = bitcast <16 x i1> %5 to i16
22845  ret i16 %6
22846}
22847
22848
22849
22850define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22851; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22852; VLX:       # %bb.0: # %entry
22853; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
22854; VLX-NEXT:    kmovd %k0, %eax
22855; VLX-NEXT:    movzbl %al, %eax
22856; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22857; VLX-NEXT:    vzeroupper
22858; VLX-NEXT:    retq
22859;
22860; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22861; NoVLX:       # %bb.0: # %entry
22862; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
22863; NoVLX-NEXT:    kmovw %k0, %eax
22864; NoVLX-NEXT:    movzbl %al, %eax
22865; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22866; NoVLX-NEXT:    vzeroupper
22867; NoVLX-NEXT:    retq
22868entry:
22869  %0 = bitcast <8 x i64> %__a to <8 x double>
22870  %1 = bitcast <8 x i64> %__b to <8 x double>
22871  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
22872  %3 = bitcast <8 x i1> %2 to i8
22873  %4 = zext i8 %3 to i16
22874  ret i16 %4
22875}
22876
22877define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22878; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22879; VLX:       # %bb.0: # %entry
22880; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
22881; VLX-NEXT:    kmovd %k0, %eax
22882; VLX-NEXT:    andb %dil, %al
22883; VLX-NEXT:    movzbl %al, %eax
22884; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22885; VLX-NEXT:    vzeroupper
22886; VLX-NEXT:    retq
22887;
22888; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22889; NoVLX:       # %bb.0: # %entry
22890; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
22891; NoVLX-NEXT:    kmovw %k0, %eax
22892; NoVLX-NEXT:    andb %dil, %al
22893; NoVLX-NEXT:    movzbl %al, %eax
22894; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22895; NoVLX-NEXT:    vzeroupper
22896; NoVLX-NEXT:    retq
22897entry:
22898  %0 = bitcast <8 x i64> %__a to <8 x double>
22899  %1 = bitcast <8 x i64> %__b to <8 x double>
22900  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
22901  %3 = bitcast i8 %__u to <8 x i1>
22902  %4 = and <8 x i1> %2, %3
22903  %5 = bitcast <8 x i1> %4 to i8
22904  %6 = zext i8 %5 to i16
22905  ret i16 %6
22906}
22907
22908
22909
22910define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22911; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22912; VLX:       # %bb.0: # %entry
22913; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22914; VLX-NEXT:    kmovd %k0, %eax
22915; VLX-NEXT:    vzeroupper
22916; VLX-NEXT:    retq
22917;
22918; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22919; NoVLX:       # %bb.0: # %entry
22920; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22921; NoVLX-NEXT:    kmovw %k0, %eax
22922; NoVLX-NEXT:    vzeroupper
22923; NoVLX-NEXT:    retq
22924entry:
22925  %0 = bitcast <8 x i64> %__a to <8 x double>
22926  %1 = bitcast <8 x i64> %__b to <8 x double>
22927  %2 = fcmp oeq <8 x double> %0, %1
22928  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22929  %4 = bitcast <32 x i1> %3 to i32
22930  ret i32 %4
22931}
22932
22933define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22934; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22935; VLX:       # %bb.0: # %entry
22936; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
22937; VLX-NEXT:    kmovd %k0, %eax
22938; VLX-NEXT:    vzeroupper
22939; VLX-NEXT:    retq
22940;
22941; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22942; NoVLX:       # %bb.0: # %entry
22943; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
22944; NoVLX-NEXT:    kmovw %k0, %eax
22945; NoVLX-NEXT:    vzeroupper
22946; NoVLX-NEXT:    retq
22947entry:
22948  %0 = bitcast <8 x i64> %__a to <8 x double>
22949  %load = load <8 x i64>, ptr %__b
22950  %1 = bitcast <8 x i64> %load to <8 x double>
22951  %2 = fcmp oeq <8 x double> %0, %1
22952  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22953  %4 = bitcast <32 x i1> %3 to i32
22954  ret i32 %4
22955}
22956
22957define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22958; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22959; VLX:       # %bb.0: # %entry
22960; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22961; VLX-NEXT:    kmovd %k0, %eax
22962; VLX-NEXT:    vzeroupper
22963; VLX-NEXT:    retq
22964;
22965; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22966; NoVLX:       # %bb.0: # %entry
22967; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22968; NoVLX-NEXT:    kmovw %k0, %eax
22969; NoVLX-NEXT:    vzeroupper
22970; NoVLX-NEXT:    retq
22971entry:
22972  %0 = bitcast <8 x i64> %__a to <8 x double>
22973  %load = load double, ptr %__b
22974  %vec = insertelement <8 x double> undef, double %load, i32 0
22975  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22976  %2 = fcmp oeq <8 x double> %0, %1
22977  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22978  %4 = bitcast <32 x i1> %3 to i32
22979  ret i32 %4
22980}
22981
22982define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22983; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22984; VLX:       # %bb.0: # %entry
22985; VLX-NEXT:    kmovd %edi, %k1
22986; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22987; VLX-NEXT:    kmovd %k0, %eax
22988; VLX-NEXT:    vzeroupper
22989; VLX-NEXT:    retq
22990;
22991; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22992; NoVLX:       # %bb.0: # %entry
22993; NoVLX-NEXT:    kmovw %edi, %k1
22994; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22995; NoVLX-NEXT:    kmovw %k0, %eax
22996; NoVLX-NEXT:    vzeroupper
22997; NoVLX-NEXT:    retq
22998entry:
22999  %0 = bitcast <8 x i64> %__a to <8 x double>
23000  %1 = bitcast <8 x i64> %__b to <8 x double>
23001  %2 = fcmp oeq <8 x double> %0, %1
23002  %3 = bitcast i8 %__u to <8 x i1>
23003  %4 = and <8 x i1> %2, %3
23004  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23005  %6 = bitcast <32 x i1> %5 to i32
23006  ret i32 %6
23007}
23008
23009define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23010; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23011; VLX:       # %bb.0: # %entry
23012; VLX-NEXT:    kmovd %edi, %k1
23013; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23014; VLX-NEXT:    kmovd %k0, %eax
23015; VLX-NEXT:    vzeroupper
23016; VLX-NEXT:    retq
23017;
23018; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23019; NoVLX:       # %bb.0: # %entry
23020; NoVLX-NEXT:    kmovw %edi, %k1
23021; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23022; NoVLX-NEXT:    kmovw %k0, %eax
23023; NoVLX-NEXT:    vzeroupper
23024; NoVLX-NEXT:    retq
23025entry:
23026  %0 = bitcast <8 x i64> %__a to <8 x double>
23027  %load = load <8 x i64>, ptr %__b
23028  %1 = bitcast <8 x i64> %load to <8 x double>
23029  %2 = fcmp oeq <8 x double> %0, %1
23030  %3 = bitcast i8 %__u to <8 x i1>
23031  %4 = and <8 x i1> %2, %3
23032  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23033  %6 = bitcast <32 x i1> %5 to i32
23034  ret i32 %6
23035}
23036
23037define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23038; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23039; VLX:       # %bb.0: # %entry
23040; VLX-NEXT:    kmovd %edi, %k1
23041; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23042; VLX-NEXT:    kmovd %k0, %eax
23043; VLX-NEXT:    vzeroupper
23044; VLX-NEXT:    retq
23045;
23046; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23047; NoVLX:       # %bb.0: # %entry
23048; NoVLX-NEXT:    kmovw %edi, %k1
23049; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23050; NoVLX-NEXT:    kmovw %k0, %eax
23051; NoVLX-NEXT:    vzeroupper
23052; NoVLX-NEXT:    retq
23053entry:
23054  %0 = bitcast <8 x i64> %__a to <8 x double>
23055  %load = load double, ptr %__b
23056  %vec = insertelement <8 x double> undef, double %load, i32 0
23057  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23058  %2 = fcmp oeq <8 x double> %0, %1
23059  %3 = bitcast i8 %__u to <8 x i1>
23060  %4 = and <8 x i1> %2, %3
23061  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23062  %6 = bitcast <32 x i1> %5 to i32
23063  ret i32 %6
23064}
23065
23066
23067
23068define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23069; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23070; VLX:       # %bb.0: # %entry
23071; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23072; VLX-NEXT:    kmovb %k0, %eax
23073; VLX-NEXT:    vzeroupper
23074; VLX-NEXT:    retq
23075;
23076; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23077; NoVLX:       # %bb.0: # %entry
23078; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23079; NoVLX-NEXT:    kmovw %k0, %eax
23080; NoVLX-NEXT:    movzbl %al, %eax
23081; NoVLX-NEXT:    vzeroupper
23082; NoVLX-NEXT:    retq
23083entry:
23084  %0 = bitcast <8 x i64> %__a to <8 x double>
23085  %1 = bitcast <8 x i64> %__b to <8 x double>
23086  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23087  %3 = bitcast <8 x i1> %2 to i8
23088  %4 = zext i8 %3 to i32
23089  ret i32 %4
23090}
23091
23092define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23093; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23094; VLX:       # %bb.0: # %entry
23095; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23096; VLX-NEXT:    kmovd %k0, %eax
23097; VLX-NEXT:    andb %dil, %al
23098; VLX-NEXT:    movzbl %al, %eax
23099; VLX-NEXT:    vzeroupper
23100; VLX-NEXT:    retq
23101;
23102; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23103; NoVLX:       # %bb.0: # %entry
23104; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23105; NoVLX-NEXT:    kmovw %k0, %eax
23106; NoVLX-NEXT:    andb %dil, %al
23107; NoVLX-NEXT:    movzbl %al, %eax
23108; NoVLX-NEXT:    vzeroupper
23109; NoVLX-NEXT:    retq
23110entry:
23111  %0 = bitcast <8 x i64> %__a to <8 x double>
23112  %1 = bitcast <8 x i64> %__b to <8 x double>
23113  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23114  %3 = bitcast i8 %__u to <8 x i1>
23115  %4 = and <8 x i1> %2, %3
23116  %5 = bitcast <8 x i1> %4 to i8
23117  %6 = zext i8 %5 to i32
23118  ret i32 %6
23119}
23120
23121
23122
23123define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23124; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23125; VLX:       # %bb.0: # %entry
23126; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23127; VLX-NEXT:    kmovq %k0, %rax
23128; VLX-NEXT:    vzeroupper
23129; VLX-NEXT:    retq
23130;
23131; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23132; NoVLX:       # %bb.0: # %entry
23133; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23134; NoVLX-NEXT:    kmovw %k0, %eax
23135; NoVLX-NEXT:    vzeroupper
23136; NoVLX-NEXT:    retq
23137entry:
23138  %0 = bitcast <8 x i64> %__a to <8 x double>
23139  %1 = bitcast <8 x i64> %__b to <8 x double>
23140  %2 = fcmp oeq <8 x double> %0, %1
23141  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23142  %4 = bitcast <64 x i1> %3 to i64
23143  ret i64 %4
23144}
23145
23146define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
23147; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23148; VLX:       # %bb.0: # %entry
23149; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23150; VLX-NEXT:    kmovq %k0, %rax
23151; VLX-NEXT:    vzeroupper
23152; VLX-NEXT:    retq
23153;
23154; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23155; NoVLX:       # %bb.0: # %entry
23156; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23157; NoVLX-NEXT:    kmovw %k0, %eax
23158; NoVLX-NEXT:    vzeroupper
23159; NoVLX-NEXT:    retq
23160entry:
23161  %0 = bitcast <8 x i64> %__a to <8 x double>
23162  %load = load <8 x i64>, ptr %__b
23163  %1 = bitcast <8 x i64> %load to <8 x double>
23164  %2 = fcmp oeq <8 x double> %0, %1
23165  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23166  %4 = bitcast <64 x i1> %3 to i64
23167  ret i64 %4
23168}
23169
23170define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
23171; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23172; VLX:       # %bb.0: # %entry
23173; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23174; VLX-NEXT:    kmovq %k0, %rax
23175; VLX-NEXT:    vzeroupper
23176; VLX-NEXT:    retq
23177;
23178; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23179; NoVLX:       # %bb.0: # %entry
23180; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23181; NoVLX-NEXT:    kmovw %k0, %eax
23182; NoVLX-NEXT:    vzeroupper
23183; NoVLX-NEXT:    retq
23184entry:
23185  %0 = bitcast <8 x i64> %__a to <8 x double>
23186  %load = load double, ptr %__b
23187  %vec = insertelement <8 x double> undef, double %load, i32 0
23188  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23189  %2 = fcmp oeq <8 x double> %0, %1
23190  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23191  %4 = bitcast <64 x i1> %3 to i64
23192  ret i64 %4
23193}
23194
23195define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23196; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23197; VLX:       # %bb.0: # %entry
23198; VLX-NEXT:    kmovd %edi, %k1
23199; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23200; VLX-NEXT:    kmovq %k0, %rax
23201; VLX-NEXT:    vzeroupper
23202; VLX-NEXT:    retq
23203;
23204; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23205; NoVLX:       # %bb.0: # %entry
23206; NoVLX-NEXT:    kmovw %edi, %k1
23207; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23208; NoVLX-NEXT:    kmovw %k0, %eax
23209; NoVLX-NEXT:    vzeroupper
23210; NoVLX-NEXT:    retq
23211entry:
23212  %0 = bitcast <8 x i64> %__a to <8 x double>
23213  %1 = bitcast <8 x i64> %__b to <8 x double>
23214  %2 = fcmp oeq <8 x double> %0, %1
23215  %3 = bitcast i8 %__u to <8 x i1>
23216  %4 = and <8 x i1> %2, %3
23217  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23218  %6 = bitcast <64 x i1> %5 to i64
23219  ret i64 %6
23220}
23221
23222define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23223; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23224; VLX:       # %bb.0: # %entry
23225; VLX-NEXT:    kmovd %edi, %k1
23226; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23227; VLX-NEXT:    kmovq %k0, %rax
23228; VLX-NEXT:    vzeroupper
23229; VLX-NEXT:    retq
23230;
23231; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23232; NoVLX:       # %bb.0: # %entry
23233; NoVLX-NEXT:    kmovw %edi, %k1
23234; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23235; NoVLX-NEXT:    kmovw %k0, %eax
23236; NoVLX-NEXT:    vzeroupper
23237; NoVLX-NEXT:    retq
23238entry:
23239  %0 = bitcast <8 x i64> %__a to <8 x double>
23240  %load = load <8 x i64>, ptr %__b
23241  %1 = bitcast <8 x i64> %load to <8 x double>
23242  %2 = fcmp oeq <8 x double> %0, %1
23243  %3 = bitcast i8 %__u to <8 x i1>
23244  %4 = and <8 x i1> %2, %3
23245  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23246  %6 = bitcast <64 x i1> %5 to i64
23247  ret i64 %6
23248}
23249
23250define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23251; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23252; VLX:       # %bb.0: # %entry
23253; VLX-NEXT:    kmovd %edi, %k1
23254; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23255; VLX-NEXT:    kmovq %k0, %rax
23256; VLX-NEXT:    vzeroupper
23257; VLX-NEXT:    retq
23258;
23259; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23260; NoVLX:       # %bb.0: # %entry
23261; NoVLX-NEXT:    kmovw %edi, %k1
23262; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23263; NoVLX-NEXT:    kmovw %k0, %eax
23264; NoVLX-NEXT:    vzeroupper
23265; NoVLX-NEXT:    retq
23266entry:
23267  %0 = bitcast <8 x i64> %__a to <8 x double>
23268  %load = load double, ptr %__b
23269  %vec = insertelement <8 x double> undef, double %load, i32 0
23270  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23271  %2 = fcmp oeq <8 x double> %0, %1
23272  %3 = bitcast i8 %__u to <8 x i1>
23273  %4 = and <8 x i1> %2, %3
23274  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23275  %6 = bitcast <64 x i1> %5 to i64
23276  ret i64 %6
23277}
23278
23279
23280
23281define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23282; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23283; VLX:       # %bb.0: # %entry
23284; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23285; VLX-NEXT:    kmovb %k0, %eax
23286; VLX-NEXT:    vzeroupper
23287; VLX-NEXT:    retq
23288;
23289; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23290; NoVLX:       # %bb.0: # %entry
23291; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23292; NoVLX-NEXT:    kmovw %k0, %eax
23293; NoVLX-NEXT:    movzbl %al, %eax
23294; NoVLX-NEXT:    vzeroupper
23295; NoVLX-NEXT:    retq
23296entry:
23297  %0 = bitcast <8 x i64> %__a to <8 x double>
23298  %1 = bitcast <8 x i64> %__b to <8 x double>
23299  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23300  %3 = bitcast <8 x i1> %2 to i8
23301  %4 = zext i8 %3 to i64
23302  ret i64 %4
23303}
23304
23305define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23306; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23307; VLX:       # %bb.0: # %entry
23308; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23309; VLX-NEXT:    kmovd %k0, %eax
23310; VLX-NEXT:    andb %dil, %al
23311; VLX-NEXT:    movzbl %al, %eax
23312; VLX-NEXT:    vzeroupper
23313; VLX-NEXT:    retq
23314;
23315; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23316; NoVLX:       # %bb.0: # %entry
23317; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23318; NoVLX-NEXT:    kmovw %k0, %eax
23319; NoVLX-NEXT:    andb %dil, %al
23320; NoVLX-NEXT:    movzbl %al, %eax
23321; NoVLX-NEXT:    vzeroupper
23322; NoVLX-NEXT:    retq
23323entry:
23324  %0 = bitcast <8 x i64> %__a to <8 x double>
23325  %1 = bitcast <8 x i64> %__b to <8 x double>
23326  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23327  %3 = bitcast i8 %__u to <8 x i1>
23328  %4 = and <8 x i1> %2, %3
23329  %5 = bitcast <8 x i1> %4 to i8
23330  %6 = zext i8 %5 to i64
23331  ret i64 %6
23332}
23333
23334; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23335define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23336; VLX-LABEL: test_cmpm_rnd_zero:
23337; VLX:       # %bb.0:
23338; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
23339; VLX-NEXT:    kmovd %k0, %eax
23340; VLX-NEXT:    vzeroupper
23341; VLX-NEXT:    retq
23342;
23343; NoVLX-LABEL: test_cmpm_rnd_zero:
23344; NoVLX:       # %bb.0:
23345; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
23346; NoVLX-NEXT:    kmovw %k0, %eax
23347; NoVLX-NEXT:    vzeroupper
23348; NoVLX-NEXT:    retq
23349  %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23350  %1 = bitcast <16 x i1> %res to i16
23351  %cast = bitcast i16 %1 to <16 x i1>
23352  %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23353  %cast2 = bitcast <32 x i1> %shuffle to i32
23354  ret i32 %cast2
23355}
23356
23357define i8 @mask_zero_lower(<4 x i32> %a) {
23358; VLX-LABEL: mask_zero_lower:
23359; VLX:       # %bb.0:
23360; VLX-NEXT:    vptestmd %xmm0, %xmm0, %k0
23361; VLX-NEXT:    kshiftlb $4, %k0, %k0
23362; VLX-NEXT:    kmovd %k0, %eax
23363; VLX-NEXT:    # kill: def $al killed $al killed $eax
23364; VLX-NEXT:    retq
23365;
23366; NoVLX-LABEL: mask_zero_lower:
23367; NoVLX:       # %bb.0:
23368; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23369; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
23370; NoVLX-NEXT:    kshiftlw $4, %k0, %k0
23371; NoVLX-NEXT:    kmovw %k0, %eax
23372; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
23373; NoVLX-NEXT:    vzeroupper
23374; NoVLX-NEXT:    retq
23375  %cmp = icmp ne <4 x i32> %a, zeroinitializer
23376  %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23377  %cast = bitcast <8 x i1> %concat to i8
23378  ret i8 %cast
23379}
23380