xref: /llvm-project/llvm/test/CodeGen/X86/kshift.ll (revision 3e4ee76fe0a015e306b58f0d8c1565f9f06ff9c3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefix=SKX
4
5define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
6; KNL-LABEL: kshiftl_v8i1_1:
7; KNL:       # %bb.0:
8; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
9; KNL-NEXT:    kshiftlw $1, %k0, %k1
10; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
11; KNL-NEXT:    kmovw %k0, %eax
12; KNL-NEXT:    # kill: def $al killed $al killed $eax
13; KNL-NEXT:    vzeroupper
14; KNL-NEXT:    retq
15;
16; SKX-LABEL: kshiftl_v8i1_1:
17; SKX:       # %bb.0:
18; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
19; SKX-NEXT:    kshiftlb $1, %k0, %k1
20; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
21; SKX-NEXT:    kmovd %k0, %eax
22; SKX-NEXT:    # kill: def $al killed $al killed $eax
23; SKX-NEXT:    vzeroupper
24; SKX-NEXT:    retq
25  %a = icmp eq <8 x i64> %x, zeroinitializer
26  %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
27  %c = icmp eq <8 x i64> %y, zeroinitializer
28  %d = and <8 x i1> %b, %c
29  %e = bitcast <8 x i1> %d to i8
30  ret i8 %e
31}
32
33define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
34; KNL-LABEL: kshiftl_v16i1_1:
35; KNL:       # %bb.0:
36; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
37; KNL-NEXT:    kshiftlw $1, %k0, %k1
38; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
39; KNL-NEXT:    kmovw %k0, %eax
40; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
41; KNL-NEXT:    vzeroupper
42; KNL-NEXT:    retq
43;
44; SKX-LABEL: kshiftl_v16i1_1:
45; SKX:       # %bb.0:
46; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
47; SKX-NEXT:    kshiftlw $1, %k0, %k1
48; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
49; SKX-NEXT:    kmovd %k0, %eax
50; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
51; SKX-NEXT:    vzeroupper
52; SKX-NEXT:    retq
53  %a = icmp eq <16 x i32> %x, zeroinitializer
54  %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
55  %c = icmp eq <16 x i32> %y, zeroinitializer
56  %d = and <16 x i1> %b, %c
57  %e = bitcast <16 x i1> %d to i16
58  ret i16 %e
59}
60
61define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
62; KNL-LABEL: kshiftl_v32i1_1:
63; KNL:       # %bb.0:
64; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
65; KNL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
66; KNL-NEXT:    vpcmpeqw %ymm3, %ymm2, %ymm2
67; KNL-NEXT:    vpmovsxwd %ymm2, %zmm2
68; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
69; KNL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
70; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
71; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k2
72; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
73; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
74; KNL-NEXT:    valignd {{.*#+}} zmm0 = zmm0[15],zmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
75; KNL-NEXT:    kshiftlw $1, %k2, %k1
76; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
77; KNL-NEXT:    vpcmpeqw %ymm3, %ymm2, %ymm2
78; KNL-NEXT:    vpmovsxwd %ymm2, %zmm2
79; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k2
80; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
81; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
82; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k1}
83; KNL-NEXT:    kmovw %k0, %ecx
84; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
85; KNL-NEXT:    kmovw %k0, %eax
86; KNL-NEXT:    shll $16, %eax
87; KNL-NEXT:    orl %ecx, %eax
88; KNL-NEXT:    vzeroupper
89; KNL-NEXT:    retq
90;
91; SKX-LABEL: kshiftl_v32i1_1:
92; SKX:       # %bb.0:
93; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
94; SKX-NEXT:    kshiftld $1, %k0, %k1
95; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k0 {%k1}
96; SKX-NEXT:    kmovd %k0, %eax
97; SKX-NEXT:    vzeroupper
98; SKX-NEXT:    retq
99  %a = icmp eq <32 x i16> %x, zeroinitializer
100  %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
101  %c = icmp eq <32 x i16> %y, zeroinitializer
102  %d = and <32 x i1> %b, %c
103  %e = bitcast <32 x i1> %d to i32
104  ret i32 %e
105}
106
107define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
108; KNL-LABEL: kshiftl_v64i1_1:
109; KNL:       # %bb.0:
110; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
111; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm3
112; KNL-NEXT:    vpmovsxbd %xmm3, %zmm4
113; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k1
114; KNL-NEXT:    vextracti128 $1, %ymm3, %xmm3
115; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
116; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k2
117; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
118; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
119; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
120; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
121; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k3
122; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
123; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k4
124; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
125; KNL-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
126; KNL-NEXT:    valignd {{.*#+}} zmm3 = zmm0[15],zmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
127; KNL-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
128; KNL-NEXT:    valignd {{.*#+}} zmm0 = zmm4[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
129; KNL-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
130; KNL-NEXT:    valignd {{.*#+}} zmm4 = zmm5[15],zmm4[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
131; KNL-NEXT:    kshiftlw $1, %k1, %k3
132; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
133; KNL-NEXT:    vpcmpeqb %ymm2, %ymm5, %ymm5
134; KNL-NEXT:    vextracti128 $1, %ymm5, %xmm6
135; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6
136; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k1
137; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5
138; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k2
139; KNL-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
140; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
141; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
142; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k4
143; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
144; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k3}
145; KNL-NEXT:    kmovw %k0, %eax
146; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0 {%k4}
147; KNL-NEXT:    kmovw %k0, %ecx
148; KNL-NEXT:    shll $16, %ecx
149; KNL-NEXT:    orl %eax, %ecx
150; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
151; KNL-NEXT:    kmovw %k0, %edx
152; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0 {%k1}
153; KNL-NEXT:    kmovw %k0, %eax
154; KNL-NEXT:    shll $16, %eax
155; KNL-NEXT:    orl %edx, %eax
156; KNL-NEXT:    shlq $32, %rax
157; KNL-NEXT:    orq %rcx, %rax
158; KNL-NEXT:    vzeroupper
159; KNL-NEXT:    retq
160;
161; SKX-LABEL: kshiftl_v64i1_1:
162; SKX:       # %bb.0:
163; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
164; SKX-NEXT:    kshiftlq $1, %k0, %k1
165; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k0 {%k1}
166; SKX-NEXT:    kmovq %k0, %rax
167; SKX-NEXT:    vzeroupper
168; SKX-NEXT:    retq
169  %a = icmp eq <64 x i8> %x, zeroinitializer
170  %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 64, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
171  %c = icmp eq <64 x i8> %y, zeroinitializer
172  %d = and <64 x i1> %b, %c
173  %e = bitcast <64 x i1> %d to i64
174  ret i64 %e
175}
176
177define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
178; KNL-LABEL: kshiftl_v8i1_7:
179; KNL:       # %bb.0:
180; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
181; KNL-NEXT:    kshiftlw $7, %k0, %k1
182; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
183; KNL-NEXT:    kmovw %k0, %eax
184; KNL-NEXT:    # kill: def $al killed $al killed $eax
185; KNL-NEXT:    vzeroupper
186; KNL-NEXT:    retq
187;
188; SKX-LABEL: kshiftl_v8i1_7:
189; SKX:       # %bb.0:
190; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
191; SKX-NEXT:    kshiftlb $7, %k0, %k1
192; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
193; SKX-NEXT:    kmovd %k0, %eax
194; SKX-NEXT:    # kill: def $al killed $al killed $eax
195; SKX-NEXT:    vzeroupper
196; SKX-NEXT:    retq
197  %a = icmp eq <8 x i64> %x, zeroinitializer
198  %b = shufflevector <8 x i1> zeroinitializer, <8 x i1> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
199  %c = icmp eq <8 x i64> %y, zeroinitializer
200  %d = and <8 x i1> %b, %c
201  %e = bitcast <8 x i1> %d to i8
202  ret i8 %e
203}
204
205define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
206; KNL-LABEL: kshiftl_v16i1_15:
207; KNL:       # %bb.0:
208; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
209; KNL-NEXT:    kshiftlw $15, %k0, %k1
210; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
211; KNL-NEXT:    kmovw %k0, %eax
212; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
213; KNL-NEXT:    vzeroupper
214; KNL-NEXT:    retq
215;
216; SKX-LABEL: kshiftl_v16i1_15:
217; SKX:       # %bb.0:
218; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
219; SKX-NEXT:    kshiftlw $15, %k0, %k1
220; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
221; SKX-NEXT:    kmovd %k0, %eax
222; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
223; SKX-NEXT:    vzeroupper
224; SKX-NEXT:    retq
225  %a = icmp eq <16 x i32> %x, zeroinitializer
226  %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
227  %c = icmp eq <16 x i32> %y, zeroinitializer
228  %d = and <16 x i1> %b, %c
229  %e = bitcast <16 x i1> %d to i16
230  ret i16 %e
231}
232
233define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
234; KNL-LABEL: kshiftl_v32i1_31:
235; KNL:       # %bb.0:
236; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
237; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
238; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
239; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
240; KNL-NEXT:    kshiftlw $15, %k0, %k1
241; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
242; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
243; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
244; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
245; KNL-NEXT:    kmovw %k0, %eax
246; KNL-NEXT:    shll $16, %eax
247; KNL-NEXT:    vzeroupper
248; KNL-NEXT:    retq
249;
250; SKX-LABEL: kshiftl_v32i1_31:
251; SKX:       # %bb.0:
252; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
253; SKX-NEXT:    kshiftld $31, %k0, %k1
254; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k0 {%k1}
255; SKX-NEXT:    kmovd %k0, %eax
256; SKX-NEXT:    vzeroupper
257; SKX-NEXT:    retq
258  %a = icmp eq <32 x i16> %x, zeroinitializer
259  %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
260  %c = icmp eq <32 x i16> %y, zeroinitializer
261  %d = and <32 x i1> %b, %c
262  %e = bitcast <32 x i1> %d to i32
263  ret i32 %e
264}
265
266define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
267; KNL-LABEL: kshiftl_v64i1_63:
268; KNL:       # %bb.0:
269; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
270; KNL-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
271; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
272; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
273; KNL-NEXT:    kshiftlw $15, %k0, %k1
274; KNL-NEXT:    vextracti32x4 $3, %zmm1, %xmm0
275; KNL-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
276; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
277; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
278; KNL-NEXT:    kmovw %k0, %eax
279; KNL-NEXT:    movzwl %ax, %eax
280; KNL-NEXT:    shlq $48, %rax
281; KNL-NEXT:    vzeroupper
282; KNL-NEXT:    retq
283;
284; SKX-LABEL: kshiftl_v64i1_63:
285; SKX:       # %bb.0:
286; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
287; SKX-NEXT:    kshiftlq $63, %k0, %k1
288; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k0 {%k1}
289; SKX-NEXT:    kmovq %k0, %rax
290; SKX-NEXT:    vzeroupper
291; SKX-NEXT:    retq
292  %a = icmp eq <64 x i8> %x, zeroinitializer
293  %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
294  %c = icmp eq <64 x i8> %y, zeroinitializer
295  %d = and <64 x i1> %b, %c
296  %e = bitcast <64 x i1> %d to i64
297  ret i64 %e
298}
299
300define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
301; KNL-LABEL: kshiftr_v8i1_1:
302; KNL:       # %bb.0:
303; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
304; KNL-NEXT:    kshiftlw $8, %k0, %k0
305; KNL-NEXT:    kshiftrw $9, %k0, %k1
306; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
307; KNL-NEXT:    kmovw %k0, %eax
308; KNL-NEXT:    # kill: def $al killed $al killed $eax
309; KNL-NEXT:    vzeroupper
310; KNL-NEXT:    retq
311;
312; SKX-LABEL: kshiftr_v8i1_1:
313; SKX:       # %bb.0:
314; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
315; SKX-NEXT:    kshiftrb $1, %k0, %k1
316; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
317; SKX-NEXT:    kmovd %k0, %eax
318; SKX-NEXT:    # kill: def $al killed $al killed $eax
319; SKX-NEXT:    vzeroupper
320; SKX-NEXT:    retq
321  %a = icmp eq <8 x i64> %x, zeroinitializer
322  %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
323  %c = icmp eq <8 x i64> %y, zeroinitializer
324  %d = and <8 x i1> %b, %c
325  %e = bitcast <8 x i1> %d to i8
326  ret i8 %e
327}
328
329define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
330; KNL-LABEL: kshiftr_v16i1_1:
331; KNL:       # %bb.0:
332; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
333; KNL-NEXT:    kshiftrw $1, %k0, %k1
334; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
335; KNL-NEXT:    kmovw %k0, %eax
336; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
337; KNL-NEXT:    vzeroupper
338; KNL-NEXT:    retq
339;
340; SKX-LABEL: kshiftr_v16i1_1:
341; SKX:       # %bb.0:
342; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
343; SKX-NEXT:    kshiftrw $1, %k0, %k1
344; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
345; SKX-NEXT:    kmovd %k0, %eax
346; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
347; SKX-NEXT:    vzeroupper
348; SKX-NEXT:    retq
349  %a = icmp eq <16 x i32> %x, zeroinitializer
350  %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
351  %c = icmp eq <16 x i32> %y, zeroinitializer
352  %d = and <16 x i1> %b, %c
353  %e = bitcast <16 x i1> %d to i16
354  ret i16 %e
355}
356
357define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
358; KNL-LABEL: kshiftr_v32i1_1:
359; KNL:       # %bb.0:
360; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
361; KNL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
362; KNL-NEXT:    vpcmpeqw %ymm3, %ymm2, %ymm2
363; KNL-NEXT:    vpmovsxwd %ymm2, %zmm2
364; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
365; KNL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
366; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
367; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k2
368; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
369; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
370; KNL-NEXT:    valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm2[0]
371; KNL-NEXT:    kshiftrw $1, %k1, %k1
372; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm2
373; KNL-NEXT:    vpmovsxwd %ymm2, %zmm2
374; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k2
375; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
376; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
377; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
378; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k1}
379; KNL-NEXT:    kmovw %k0, %ecx
380; KNL-NEXT:    shll $16, %ecx
381; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
382; KNL-NEXT:    kmovw %k0, %eax
383; KNL-NEXT:    orl %ecx, %eax
384; KNL-NEXT:    vzeroupper
385; KNL-NEXT:    retq
386;
387; SKX-LABEL: kshiftr_v32i1_1:
388; SKX:       # %bb.0:
389; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
390; SKX-NEXT:    kshiftrd $1, %k0, %k1
391; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k0 {%k1}
392; SKX-NEXT:    kmovd %k0, %eax
393; SKX-NEXT:    vzeroupper
394; SKX-NEXT:    retq
395  %a = icmp eq <32 x i16> %x, zeroinitializer
396  %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
397  %c = icmp eq <32 x i16> %y, zeroinitializer
398  %d = and <32 x i1> %b, %c
399  %e = bitcast <32 x i1> %d to i32
400  ret i32 %e
401}
402
403define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
404; KNL-LABEL: kshiftr_v64i1_1:
405; KNL:       # %bb.0:
406; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
407; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
408; KNL-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm3
409; KNL-NEXT:    vextracti128 $1, %ymm3, %xmm4
410; KNL-NEXT:    vpmovsxbd %xmm4, %zmm4
411; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k1
412; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
413; KNL-NEXT:    vpmovsxbd %xmm0, %zmm4
414; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k2
415; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
416; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k3
417; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
418; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
419; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k4
420; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
421; KNL-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
422; KNL-NEXT:    valignd {{.*#+}} zmm4 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm3[0]
423; KNL-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
424; KNL-NEXT:    valignd {{.*#+}} zmm0 = zmm5[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0]
425; KNL-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
426; KNL-NEXT:    valignd {{.*#+}} zmm3 = zmm3[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm5[0]
427; KNL-NEXT:    kshiftrw $1, %k1, %k3
428; KNL-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm5
429; KNL-NEXT:    vextracti128 $1, %ymm5, %xmm6
430; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6
431; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k1
432; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5
433; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k2
434; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
435; KNL-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
436; KNL-NEXT:    vpmovsxbd %xmm1, %zmm2
437; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k4
438; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
439; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
440; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k3}
441; KNL-NEXT:    kmovw %k0, %eax
442; KNL-NEXT:    shll $16, %eax
443; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0 {%k4}
444; KNL-NEXT:    kmovw %k0, %ecx
445; KNL-NEXT:    orl %eax, %ecx
446; KNL-NEXT:    shlq $32, %rcx
447; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
448; KNL-NEXT:    kmovw %k0, %edx
449; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0 {%k1}
450; KNL-NEXT:    kmovw %k0, %eax
451; KNL-NEXT:    shll $16, %eax
452; KNL-NEXT:    orl %edx, %eax
453; KNL-NEXT:    orq %rcx, %rax
454; KNL-NEXT:    vzeroupper
455; KNL-NEXT:    retq
456;
457; SKX-LABEL: kshiftr_v64i1_1:
458; SKX:       # %bb.0:
459; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
460; SKX-NEXT:    kshiftrq $1, %k0, %k1
461; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k0 {%k1}
462; SKX-NEXT:    kmovq %k0, %rax
463; SKX-NEXT:    vzeroupper
464; SKX-NEXT:    retq
465  %a = icmp eq <64 x i8> %x, zeroinitializer
466  %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
467  %c = icmp eq <64 x i8> %y, zeroinitializer
468  %d = and <64 x i1> %b, %c
469  %e = bitcast <64 x i1> %d to i64
470  ret i64 %e
471}
472
473define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
474; KNL-LABEL: kshiftr_v8i1_7:
475; KNL:       # %bb.0:
476; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
477; KNL-NEXT:    kshiftlw $1, %k0, %k1
478; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
479; KNL-NEXT:    kmovw %k0, %eax
480; KNL-NEXT:    # kill: def $al killed $al killed $eax
481; KNL-NEXT:    vzeroupper
482; KNL-NEXT:    retq
483;
484; SKX-LABEL: kshiftr_v8i1_7:
485; SKX:       # %bb.0:
486; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
487; SKX-NEXT:    kshiftlb $1, %k0, %k1
488; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
489; SKX-NEXT:    kmovd %k0, %eax
490; SKX-NEXT:    # kill: def $al killed $al killed $eax
491; SKX-NEXT:    vzeroupper
492; SKX-NEXT:    retq
493  %a = icmp eq <8 x i64> %x, zeroinitializer
494  %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
495  %c = icmp eq <8 x i64> %y, zeroinitializer
496  %d = and <8 x i1> %b, %c
497  %e = bitcast <8 x i1> %d to i8
498  ret i8 %e
499}
500
501define i16 @kshiftr_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
502; KNL-LABEL: kshiftr_v16i1_15:
503; KNL:       # %bb.0:
504; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
505; KNL-NEXT:    kshiftrw $15, %k0, %k1
506; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
507; KNL-NEXT:    kmovw %k0, %eax
508; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
509; KNL-NEXT:    vzeroupper
510; KNL-NEXT:    retq
511;
512; SKX-LABEL: kshiftr_v16i1_15:
513; SKX:       # %bb.0:
514; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
515; SKX-NEXT:    kshiftrw $15, %k0, %k1
516; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k0 {%k1}
517; SKX-NEXT:    kmovd %k0, %eax
518; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
519; SKX-NEXT:    vzeroupper
520; SKX-NEXT:    retq
521  %a = icmp eq <16 x i32> %x, zeroinitializer
522  %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
523  %c = icmp eq <16 x i32> %y, zeroinitializer
524  %d = and <16 x i1> %b, %c
525  %e = bitcast <16 x i1> %d to i16
526  ret i16 %e
527}
528
529define i32 @kshiftr_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
530; KNL-LABEL: kshiftr_v32i1_31:
531; KNL:       # %bb.0:
532; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
533; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
534; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
535; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
536; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
537; KNL-NEXT:    kshiftrw $15, %k0, %k1
538; KNL-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm0
539; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
540; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
541; KNL-NEXT:    kmovw %k0, %eax
542; KNL-NEXT:    vzeroupper
543; KNL-NEXT:    retq
544;
545; SKX-LABEL: kshiftr_v32i1_31:
546; SKX:       # %bb.0:
547; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
548; SKX-NEXT:    kshiftrd $31, %k0, %k1
549; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k0 {%k1}
550; SKX-NEXT:    kmovd %k0, %eax
551; SKX-NEXT:    vzeroupper
552; SKX-NEXT:    retq
553  %a = icmp eq <32 x i16> %x, zeroinitializer
554  %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 63, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
555  %c = icmp eq <32 x i16> %y, zeroinitializer
556  %d = and <32 x i1> %b, %c
557  %e = bitcast <32 x i1> %d to i32
558  ret i32 %e
559}
560
561define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
562; KNL-LABEL: kshiftr_v64i1_63:
563; KNL:       # %bb.0:
564; KNL-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
565; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
566; KNL-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
567; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
568; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
569; KNL-NEXT:    kshiftrw $15, %k0, %k1
570; KNL-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm0
571; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
572; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
573; KNL-NEXT:    kmovw %k0, %eax
574; KNL-NEXT:    vzeroupper
575; KNL-NEXT:    retq
576;
577; SKX-LABEL: kshiftr_v64i1_63:
578; SKX:       # %bb.0:
579; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
580; SKX-NEXT:    kshiftrq $63, %k0, %k1
581; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k0 {%k1}
582; SKX-NEXT:    kmovq %k0, %rax
583; SKX-NEXT:    vzeroupper
584; SKX-NEXT:    retq
585  %a = icmp eq <64 x i8> %x, zeroinitializer
586  %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 127, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
587  %c = icmp eq <64 x i8> %y, zeroinitializer
588  %d = and <64 x i1> %b, %c
589  %e = bitcast <64 x i1> %d to i64
590  ret i64 %e
591}
592
593define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
594; KNL-LABEL: kshiftl_v8i1_zu123u56:
595; KNL:       # %bb.0:
596; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
597; KNL-NEXT:    kshiftlw $1, %k0, %k1
598; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
599; KNL-NEXT:    kmovw %k0, %eax
600; KNL-NEXT:    # kill: def $al killed $al killed $eax
601; KNL-NEXT:    vzeroupper
602; KNL-NEXT:    retq
603;
604; SKX-LABEL: kshiftl_v8i1_zu123u56:
605; SKX:       # %bb.0:
606; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
607; SKX-NEXT:    kshiftlb $1, %k0, %k1
608; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
609; SKX-NEXT:    kmovd %k0, %eax
610; SKX-NEXT:    # kill: def $al killed $al killed $eax
611; SKX-NEXT:    vzeroupper
612; SKX-NEXT:    retq
613  %a = icmp eq <8 x i64> %x, zeroinitializer
614  %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 6>
615  %c = icmp eq <8 x i64> %y, zeroinitializer
616  %d = and <8 x i1> %b, %c
617  %e = bitcast <8 x i1> %d to i8
618  ret i8 %e
619}
620
621define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
622; KNL-LABEL: kshiftl_v8i1_u0123456:
623; KNL:       # %bb.0:
624; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
625; KNL-NEXT:    kshiftlw $1, %k0, %k1
626; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
627; KNL-NEXT:    kmovw %k0, %eax
628; KNL-NEXT:    # kill: def $al killed $al killed $eax
629; KNL-NEXT:    vzeroupper
630; KNL-NEXT:    retq
631;
632; SKX-LABEL: kshiftl_v8i1_u0123456:
633; SKX:       # %bb.0:
634; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
635; SKX-NEXT:    kshiftlb $1, %k0, %k1
636; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
637; SKX-NEXT:    kmovd %k0, %eax
638; SKX-NEXT:    # kill: def $al killed $al killed $eax
639; SKX-NEXT:    vzeroupper
640; SKX-NEXT:    retq
641  %a = icmp eq <8 x i64> %x, zeroinitializer
642  %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
643  %c = icmp eq <8 x i64> %y, zeroinitializer
644  %d = and <8 x i1> %b, %c
645  %e = bitcast <8 x i1> %d to i8
646  ret i8 %e
647}
648
649define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
650; KNL-LABEL: kshiftr_v8i1_1u3u567z:
651; KNL:       # %bb.0:
652; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
653; KNL-NEXT:    kshiftlw $8, %k0, %k0
654; KNL-NEXT:    kshiftrw $9, %k0, %k1
655; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
656; KNL-NEXT:    kmovw %k0, %eax
657; KNL-NEXT:    # kill: def $al killed $al killed $eax
658; KNL-NEXT:    vzeroupper
659; KNL-NEXT:    retq
660;
661; SKX-LABEL: kshiftr_v8i1_1u3u567z:
662; SKX:       # %bb.0:
663; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
664; SKX-NEXT:    kshiftrb $1, %k0, %k1
665; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
666; SKX-NEXT:    kmovd %k0, %eax
667; SKX-NEXT:    # kill: def $al killed $al killed $eax
668; SKX-NEXT:    vzeroupper
669; SKX-NEXT:    retq
670  %a = icmp eq <8 x i64> %x, zeroinitializer
671  %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 6, i32 7, i32 8>
672  %c = icmp eq <8 x i64> %y, zeroinitializer
673  %d = and <8 x i1> %b, %c
674  %e = bitcast <8 x i1> %d to i8
675  ret i8 %e
676}
677
678define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) {
679; KNL-LABEL: kshiftr_v8i1_234567uu:
680; KNL:       # %bb.0:
681; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
682; KNL-NEXT:    kshiftrw $2, %k0, %k1
683; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
684; KNL-NEXT:    kmovw %k0, %eax
685; KNL-NEXT:    # kill: def $al killed $al killed $eax
686; KNL-NEXT:    vzeroupper
687; KNL-NEXT:    retq
688;
689; SKX-LABEL: kshiftr_v8i1_234567uu:
690; SKX:       # %bb.0:
691; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
692; SKX-NEXT:    kshiftrb $2, %k0, %k1
693; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
694; SKX-NEXT:    kmovd %k0, %eax
695; SKX-NEXT:    # kill: def $al killed $al killed $eax
696; SKX-NEXT:    vzeroupper
697; SKX-NEXT:    retq
698  %a = icmp eq <8 x i64> %x, zeroinitializer
699  %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10>
700  %c = icmp eq <8 x i64> %y, zeroinitializer
701  %d = and <8 x i1> %b, %c
702  %e = bitcast <8 x i1> %d to i8
703  ret i8 %e
704}
705