xref: /llvm-project/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll (revision 2a922903bf5d5b0012c1f8f2a5396d44cfff4630)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       # %bb.0:
12; ALL-NEXT:    vpsravq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = ashr <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       # %bb.0:
21; ALL-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = ashr <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       # %bb.0:
30; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
31; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm3
32; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm3, %zmm2
33; AVX512DQ-NEXT:    vpmovdw %zmm2, %ymm2
34; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
35; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
37; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
38; AVX512DQ-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
39; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
40; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
41; AVX512DQ-NEXT:    retq
42;
43; AVX512BW-LABEL: var_shift_v32i16:
44; AVX512BW:       # %bb.0:
45; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
46; AVX512BW-NEXT:    retq
47  %shift = ashr <32 x i16> %a, %b
48  ret <32 x i16> %shift
49}
50
51define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
52; AVX512DQ-LABEL: var_shift_v64i8:
53; AVX512DQ:       # %bb.0:
54; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
55; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
56; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
57; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
58; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
59; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm6
60; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
61; AVX512DQ-NEXT:    vpsraw $2, %ymm5, %ymm6
62; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
63; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
64; AVX512DQ-NEXT:    vpsraw $1, %ymm5, %ymm6
65; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
66; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm6, %ymm5, %ymm3
67; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
68; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
69; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
70; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
71; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
72; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
73; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
74; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
75; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
76; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
77; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm4, %ymm2
78; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
79; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
80; AVX512DQ-NEXT:    vpsllw $5, %ymm1, %ymm1
81; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
82; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
83; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
84; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
85; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
86; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
87; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
88; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
89; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
90; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
91; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
92; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
93; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
94; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm4
95; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
96; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm4
97; AVX512DQ-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
98; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
99; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm4
100; AVX512DQ-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
101; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
102; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
103; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm0, %ymm0
104; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
105; AVX512DQ-NEXT:    retq
106;
107; AVX512BW-LABEL: var_shift_v64i8:
108; AVX512BW:       # %bb.0:
109; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
110; AVX512BW-NEXT:    vpsraw $4, %zmm2, %zmm3
111; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
112; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
113; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
114; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
115; AVX512BW-NEXT:    vpsraw $2, %zmm2, %zmm3
116; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
117; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
118; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
119; AVX512BW-NEXT:    vpsraw $1, %zmm2, %zmm3
120; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
121; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
122; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
123; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
124; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
125; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm3
126; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
127; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
128; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
129; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm3
130; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
131; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
132; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
133; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm3
134; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
135; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
136; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
137; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
138; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
139; AVX512BW-NEXT:    retq
140  %shift = ashr <64 x i8> %a, %b
141  ret <64 x i8> %shift
142}
143
144;
145; Uniform Variable Shifts
146;
147
148define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
149; ALL-LABEL: splatvar_shift_v8i64:
150; ALL:       # %bb.0:
151; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
152; ALL-NEXT:    retq
153  %splat = shufflevector <8 x i64> %b, <8 x i64> poison, <8 x i32> zeroinitializer
154  %shift = ashr <8 x i64> %a, %splat
155  ret <8 x i64> %shift
156}
157
158define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
159; ALL-LABEL: splatvar_shift_v16i32:
160; ALL:       # %bb.0:
161; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
162; ALL-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
163; ALL-NEXT:    retq
164  %splat = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
165  %shift = ashr <16 x i32> %a, %splat
166  ret <16 x i32> %shift
167}
168
169define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
170; AVX512DQ-LABEL: splatvar_shift_v32i16:
171; AVX512DQ:       # %bb.0:
172; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
173; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
174; AVX512DQ-NEXT:    vpsraw %xmm1, %ymm2, %ymm2
175; AVX512DQ-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
176; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
177; AVX512DQ-NEXT:    retq
178;
179; AVX512BW-LABEL: splatvar_shift_v32i16:
180; AVX512BW:       # %bb.0:
181; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
182; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
183; AVX512BW-NEXT:    retq
184  %splat = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
185  %shift = ashr <32 x i16> %a, %splat
186  ret <32 x i16> %shift
187}
188
189define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
190; AVX512DQ-LABEL: splatvar_shift_v64i8:
191; AVX512DQ:       # %bb.0:
192; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
193; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
194; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2
195; AVX512DQ-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
196; AVX512DQ-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
197; AVX512DQ-NEXT:    vpsrlw $8, %xmm3, %xmm3
198; AVX512DQ-NEXT:    vpbroadcastb %xmm3, %ymm3
199; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
200; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
201; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm4, %ymm4
202; AVX512DQ-NEXT:    vpxor %ymm4, %ymm2, %ymm2
203; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
204; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
205; AVX512DQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
206; AVX512DQ-NEXT:    vpxor %ymm4, %ymm0, %ymm0
207; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm0, %ymm0
208; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
209; AVX512DQ-NEXT:    retq
210;
211; AVX512BW-LABEL: splatvar_shift_v64i8:
212; AVX512BW:       # %bb.0:
213; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
214; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
215; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
216; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm2, %zmm2
217; AVX512BW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
218; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm3, %xmm1
219; AVX512BW-NEXT:    vpsrlw $8, %xmm1, %xmm1
220; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
221; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm1 = zmm2 ^ (zmm1 & zmm0)
222; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm0
223; AVX512BW-NEXT:    retq
224  %splat = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
225  %shift = ashr <64 x i8> %a, %splat
226  ret <64 x i8> %shift
227}
228
229;
230; Uniform Variable Modulo Shifts
231;
232
233define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
234; ALL-LABEL: splatvar_modulo_shift_v8i64:
235; ALL:       # %bb.0:
236; ALL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
237; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
238; ALL-NEXT:    retq
239  %mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
240  %splat = shufflevector <8 x i64> %mod, <8 x i64> poison, <8 x i32> zeroinitializer
241  %shift = ashr <8 x i64> %a, %splat
242  ret <8 x i64> %shift
243}
244
245define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
246; ALL-LABEL: splatvar_modulo_shift_v16i32:
247; ALL:       # %bb.0:
248; ALL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
249; ALL-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
250; ALL-NEXT:    retq
251  %mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
252  %splat = shufflevector <16 x i32> %mod, <16 x i32> poison, <16 x i32> zeroinitializer
253  %shift = ashr <16 x i32> %a, %splat
254  ret <16 x i32> %shift
255}
256
257define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
258; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
259; AVX512DQ:       # %bb.0:
260; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
261; AVX512DQ-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
262; AVX512DQ-NEXT:    vpsraw %xmm1, %ymm2, %ymm2
263; AVX512DQ-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
264; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
265; AVX512DQ-NEXT:    retq
266;
267; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
268; AVX512BW:       # %bb.0:
269; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
270; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
271; AVX512BW-NEXT:    retq
272  %mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
273  %splat = shufflevector <32 x i16> %mod, <32 x i16> poison, <32 x i32> zeroinitializer
274  %shift = ashr <32 x i16> %a, %splat
275  ret <32 x i16> %shift
276}
277
278define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
279; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
280; AVX512DQ:       # %bb.0:
281; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
282; AVX512DQ-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
283; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2
284; AVX512DQ-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
285; AVX512DQ-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
286; AVX512DQ-NEXT:    vpsrlw $8, %xmm3, %xmm3
287; AVX512DQ-NEXT:    vpbroadcastb %xmm3, %ymm3
288; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
289; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
290; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm4, %ymm4
291; AVX512DQ-NEXT:    vpxor %ymm4, %ymm2, %ymm2
292; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm2, %ymm2
293; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
294; AVX512DQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
295; AVX512DQ-NEXT:    vpxor %ymm4, %ymm0, %ymm0
296; AVX512DQ-NEXT:    vpsubb %ymm4, %ymm0, %ymm0
297; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
298; AVX512DQ-NEXT:    retq
299;
300; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
301; AVX512BW:       # %bb.0:
302; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
303; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
304; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
305; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm2, %zmm2
306; AVX512BW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
307; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm3, %xmm1
308; AVX512BW-NEXT:    vpsrlw $8, %xmm1, %xmm1
309; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
310; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm1 = zmm2 ^ (zmm1 & zmm0)
311; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm0
312; AVX512BW-NEXT:    retq
313  %mod = and <64 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
314  %splat = shufflevector <64 x i8> %mod, <64 x i8> poison, <64 x i32> zeroinitializer
315  %shift = ashr <64 x i8> %a, %splat
316  ret <64 x i8> %shift
317}
318
319;
320; Constant Shifts
321;
322
323define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
324; ALL-LABEL: constant_shift_v8i64:
325; ALL:       # %bb.0:
326; ALL-NEXT:    vpsravq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
327; ALL-NEXT:    retq
328  %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
329  ret <8 x i64> %shift
330}
331
332define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
333; ALL-LABEL: constant_shift_v16i32:
334; ALL:       # %bb.0:
335; ALL-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
336; ALL-NEXT:    retq
337  %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
338  ret <16 x i32> %shift
339}
340
341define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
342; AVX512DQ-LABEL: constant_shift_v32i16:
343; AVX512DQ:       # %bb.0:
344; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm1
345; AVX512DQ-NEXT:    vpmovsxbd {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
346; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm1, %zmm1
347; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
348; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
349; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
350; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm0, %zmm0
351; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
352; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
353; AVX512DQ-NEXT:    retq
354;
355; AVX512BW-LABEL: constant_shift_v32i16:
356; AVX512BW:       # %bb.0:
357; AVX512BW-NEXT:    vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
358; AVX512BW-NEXT:    retq
359  %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
360  ret <32 x i16> %shift
361}
362
363define <32 x i16> @constant_shift_v32i16_pairs(<32 x i16> %a) nounwind {
364; AVX512DQ-LABEL: constant_shift_v32i16_pairs:
365; AVX512DQ:       # %bb.0:
366; AVX512DQ-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
367; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [128,128,128,128,64,64,64,64,32,32,32,32,16,16,16,16,8,8,8,8,4,4,4,4,2,2,2,2,1,1,1,1]
368; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
369; AVX512DQ-NEXT:    vpsubw %ymm1, %ymm0, %ymm1
370; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
371; AVX512DQ-NEXT:    vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
372; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
373; AVX512DQ-NEXT:    retq
374;
375; AVX512BW-LABEL: constant_shift_v32i16_pairs:
376; AVX512BW:       # %bb.0:
377; AVX512BW-NEXT:    vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
378; AVX512BW-NEXT:    retq
379  %shift = ashr <32 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 9, i16 9, i16 9, i16 9, i16 10, i16 10, i16 10, i16 10, i16 11, i16 11, i16 11, i16 11, i16 12, i16 12, i16 12, i16 12, i16 13, i16 13, i16 13, i16 13, i16 14, i16 14, i16 14, i16 14, i16 15, i16 15, i16 15, i16 15>
380  ret <32 x i16> %shift
381}
382
383define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
384; AVX512DQ-LABEL: constant_shift_v64i8:
385; AVX512DQ:       # %bb.0:
386; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
387; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
388; AVX512DQ-NEXT:    vpsraw $8, %ymm2, %ymm2
389; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm3 = [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256]
390; AVX512DQ-NEXT:    # ymm3 = mem[0,1,0,1]
391; AVX512DQ-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
392; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
393; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
394; AVX512DQ-NEXT:    vpsraw $8, %ymm1, %ymm1
395; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm4 = [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2]
396; AVX512DQ-NEXT:    # ymm4 = mem[0,1,0,1]
397; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm1, %ymm1
398; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
399; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
400; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
401; AVX512DQ-NEXT:    vpsraw $8, %ymm2, %ymm2
402; AVX512DQ-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
403; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
404; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
405; AVX512DQ-NEXT:    vpsraw $8, %ymm0, %ymm0
406; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm0, %ymm0
407; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
408; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
409; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
410; AVX512DQ-NEXT:    retq
411;
412; AVX512BW-LABEL: constant_shift_v64i8:
413; AVX512BW:       # %bb.0:
414; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
415; AVX512BW-NEXT:    vpsraw $8, %zmm1, %zmm1
416; AVX512BW-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
417; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
418; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
419; AVX512BW-NEXT:    vpsraw $8, %zmm0, %zmm0
420; AVX512BW-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
421; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
422; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
423; AVX512BW-NEXT:    retq
424  %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
425  ret <64 x i8> %shift
426}
427
428define <64 x i8> @constant_shift_v64i8_pairs(<64 x i8> %a) nounwind {
429; AVX512DQ-LABEL: constant_shift_v64i8_pairs:
430; AVX512DQ:       # %bb.0:
431; AVX512DQ-NEXT:    vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [1024,1024,16384,16384,1024,4096,4096,2048,1024,32768,8192,16384,4096,512,2048,u]
432; AVX512DQ-NEXT:    vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
433; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
434; AVX512DQ-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
435; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [2,2,2,2,32,32,32,32,2,2,8,8,8,8,4,4,2,2,64,64,16,16,32,32,8,8,1,1,4,4,128,128]
436; AVX512DQ-NEXT:    vpxor %ymm2, %ymm1, %ymm1
437; AVX512DQ-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
438; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
439; AVX512DQ-NEXT:    vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [512,32768,u,512,4096,u,32768,8192,32768,4096,4096,8192,1024,1024,2048,1024]
440; AVX512DQ-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
441; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
442; AVX512DQ-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
443; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,64,64,128,128,1,1,8,8,128,128,64,64,16,16,64,64,8,8,8,8,16,16,2,2,2,2,4,4,2,2]
444; AVX512DQ-NEXT:    vpxor %ymm2, %ymm0, %ymm0
445; AVX512DQ-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
446; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
447; AVX512DQ-NEXT:    retq
448;
449; AVX512BW-LABEL: constant_shift_v64i8_pairs:
450; AVX512BW:       # %bb.0:
451; AVX512BW-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
452; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,2,2,32,32,32,32,2,2,8,8,8,8,4,4,2,2,64,64,16,16,32,32,8,8,1,1,4,4,128,128,1,1,64,64,128,128,1,1,8,8,128,128,64,64,16,16,64,64,8,8,8,8,16,16,2,2,2,2,4,4,2,2]
453; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
454; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
455; AVX512BW-NEXT:    retq
456  %shift = ashr <64 x i8> %a, <i8 6, i8 6, i8 6, i8 6, i8 2, i8 2, i8 2, i8 2, i8 6, i8 6, i8 4, i8 4, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 1, i8 1, i8 3, i8 3, i8 2, i8 2, i8 4, i8 4, i8 7, i8 7, i8 5, i8 5, i8 0, i8 0, i8 7, i8 7, i8 1, i8 1, i8 0, i8 0, i8 7, i8 7, i8 4, i8 4, i8 0, i8 0, i8 1, i8 1, i8 3, i8 3, i8 1, i8 1, i8 4, i8 4, i8 4, i8 4, i8 3, i8 3, i8 6, i8 6, i8 6, i8 6, i8 5, i8 5, i8 6, i8 6>
457  ret <64 x i8> %shift
458}
459
460define <64 x i8> @constant_shift_v64i8_quads(<64 x i8> %a) nounwind {
461; AVX512DQ-LABEL: constant_shift_v64i8_quads:
462; AVX512DQ:       # %bb.0:
463; AVX512DQ-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
464; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,4,4,4,4,32,32,32,32,1,1,1,1,1,1,1,1,4,4,4,4,1,1,1,1,4,4,4,4,8,8,8,8,16,16,16,16,16,16,16,16,2,2,2,2,64,64,64,64,4,4,4,4,32,32,32,32,128,128,128,128]
465; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
466; AVX512DQ-NEXT:    vpsubb %ymm1, %ymm0, %ymm1
467; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
468; AVX512DQ-NEXT:    vpsubb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
469; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
470; AVX512DQ-NEXT:    retq
471;
472; AVX512BW-LABEL: constant_shift_v64i8_quads:
473; AVX512BW:       # %bb.0:
474; AVX512BW-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
475; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,4,4,4,4,32,32,32,32,1,1,1,1,1,1,1,1,4,4,4,4,1,1,1,1,4,4,4,4,8,8,8,8,16,16,16,16,16,16,16,16,2,2,2,2,64,64,64,64,4,4,4,4,32,32,32,32,128,128,128,128]
476; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
477; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
478; AVX512BW-NEXT:    retq
479  %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 5, i8 5, i8 5, i8 5, i8 2, i8 2, i8 2, i8 2, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 5, i8 5, i8 5, i8 5, i8 4, i8 4, i8 4, i8 4, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 6, i8 6, i8 6, i8 6, i8 1, i8 1, i8 1, i8 1, i8 5, i8 5, i8 5, i8 5, i8 2, i8 2, i8 2, i8 2, i8 0, i8 0, i8 0, i8 0>
480  ret <64 x i8> %shift
481}
482
483;
484; Uniform Constant Shifts
485;
486
487define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
488; ALL-LABEL: splatconstant_shift_v8i64:
489; ALL:       # %bb.0:
490; ALL-NEXT:    vpsraq $7, %zmm0, %zmm0
491; ALL-NEXT:    retq
492  %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
493  ret <8 x i64> %shift
494}
495
496define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
497; ALL-LABEL: splatconstant_shift_v16i32:
498; ALL:       # %bb.0:
499; ALL-NEXT:    vpsrad $5, %zmm0, %zmm0
500; ALL-NEXT:    retq
501  %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
502  ret <16 x i32> %shift
503}
504
505define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
506; AVX512DQ-LABEL: splatconstant_shift_v32i16:
507; AVX512DQ:       # %bb.0:
508; AVX512DQ-NEXT:    vpsraw $3, %ymm0, %ymm1
509; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
510; AVX512DQ-NEXT:    vpsraw $3, %ymm0, %ymm0
511; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
512; AVX512DQ-NEXT:    retq
513;
514; AVX512BW-LABEL: splatconstant_shift_v32i16:
515; AVX512BW:       # %bb.0:
516; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm0
517; AVX512BW-NEXT:    retq
518  %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
519  ret <32 x i16> %shift
520}
521
522define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
523; AVX512DQ-LABEL: splatconstant_shift_v64i8:
524; AVX512DQ:       # %bb.0:
525; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
526; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
527; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
528; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
529; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
530; AVX512DQ-NEXT:    vpxor %ymm3, %ymm1, %ymm1
531; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
532; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
533; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
534; AVX512DQ-NEXT:    vpxor %ymm3, %ymm0, %ymm0
535; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
536; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
537; AVX512DQ-NEXT:    retq
538;
539; AVX512BW-LABEL: splatconstant_shift_v64i8:
540; AVX512BW:       # %bb.0:
541; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
542; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
543; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
544; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
545; AVX512BW-NEXT:    retq
546  %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
547  ret <64 x i8> %shift
548}
549
550define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
551; AVX512DQ-LABEL: ashr_const7_v64i8:
552; AVX512DQ:       # %bb.0:
553; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
554; AVX512DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
555; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
556; AVX512DQ-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
557; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
558; AVX512DQ-NEXT:    retq
559;
560; AVX512BW-LABEL: ashr_const7_v64i8:
561; AVX512BW:       # %bb.0:
562; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
563; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
564; AVX512BW-NEXT:    retq
565  %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
566  ret <64 x i8> %res
567}
568
569define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
570; ALL-LABEL: PR52719:
571; ALL:       # %bb.0:
572; ALL-NEXT:    vmovd %edi, %xmm1
573; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
574; ALL-NEXT:    retq
575  %vec = insertelement <8 x i32> poison, i32 %a1, i64 0
576  %splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
577  %zext = zext <8 x i32> %splat to <8 x i64>
578  %ashr = ashr <8 x i64> %a0, %zext
579  ret <8 x i64> %ashr
580}
581