xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4
5;
6; ASHR - Immediate
7;
8
9define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
10; CHECK-LABEL: @sse2_psrai_w_0(
11; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
12;
13  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
14  ret <8 x i16> %1
15}
16
17define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
18; CHECK-LABEL: @sse2_psrai_w_15(
19; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
20; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
21;
22  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
23  ret <8 x i16> %1
24}
25
26define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
27; CHECK-LABEL: @sse2_psrai_w_64(
28; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
29; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
30;
31  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
32  ret <8 x i16> %1
33}
34
35define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
36; CHECK-LABEL: @sse2_psrai_d_0(
37; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
38;
39  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
40  ret <4 x i32> %1
41}
42
43define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
44; CHECK-LABEL: @sse2_psrai_d_15(
45; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
46; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
47;
48  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
49  ret <4 x i32> %1
50}
51
52define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
53; CHECK-LABEL: @sse2_psrai_d_64(
54; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
55; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
56;
57  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
58  ret <4 x i32> %1
59}
60
61define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
62; CHECK-LABEL: @avx2_psrai_w_0(
63; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
64;
65  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
66  ret <16 x i16> %1
67}
68
69define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
70; CHECK-LABEL: @avx2_psrai_w_15(
71; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
72; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
73;
74  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
75  ret <16 x i16> %1
76}
77
78define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
79; CHECK-LABEL: @avx2_psrai_w_64(
80; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
81; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
82;
83  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
84  ret <16 x i16> %1
85}
86
87define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
88; CHECK-LABEL: @avx2_psrai_d_0(
89; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
90;
91  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
92  ret <8 x i32> %1
93}
94
95define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
96; CHECK-LABEL: @avx2_psrai_d_15(
97; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
98; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
99;
100  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
101  ret <8 x i32> %1
102}
103
104define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
105; CHECK-LABEL: @avx2_psrai_d_64(
106; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
107; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
108;
109  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
110  ret <8 x i32> %1
111}
112
113define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
114; CHECK-LABEL: @avx512_psrai_q_128_0(
115; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
116;
117  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
118  ret <2 x i64> %1
119}
120
121define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
122; CHECK-LABEL: @avx512_psrai_q_128_15(
123; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
124; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
125;
126  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
127  ret <2 x i64> %1
128}
129
130define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
131; CHECK-LABEL: @avx512_psrai_q_128_64(
132; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
133; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
134;
135  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
136  ret <2 x i64> %1
137}
138
139define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
140; CHECK-LABEL: @avx512_psrai_q_256_0(
141; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
142;
143  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
144  ret <4 x i64> %1
145}
146
147define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
148; CHECK-LABEL: @avx512_psrai_q_256_15(
149; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
150; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
151;
152  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
153  ret <4 x i64> %1
154}
155
156define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
157; CHECK-LABEL: @avx512_psrai_q_256_64(
158; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
159; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
160;
161  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
162  ret <4 x i64> %1
163}
164
165define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
166; CHECK-LABEL: @avx512_psrai_w_512_0(
167; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
168;
169  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
170  ret <32 x i16> %1
171}
172
173define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
174; CHECK-LABEL: @avx512_psrai_w_512_15(
175; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
176; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
177;
178  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
179  ret <32 x i16> %1
180}
181
182define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
183; CHECK-LABEL: @avx512_psrai_w_512_64(
184; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
185; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
186;
187  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
188  ret <32 x i16> %1
189}
190
191define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
192; CHECK-LABEL: @avx512_psrai_d_512_0(
193; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
194;
195  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
196  ret <16 x i32> %1
197}
198
199define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
200; CHECK-LABEL: @avx512_psrai_d_512_15(
201; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
202; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
203;
204  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
205  ret <16 x i32> %1
206}
207
208define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
209; CHECK-LABEL: @avx512_psrai_d_512_64(
210; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
211; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
212;
213  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
214  ret <16 x i32> %1
215}
216
217define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
218; CHECK-LABEL: @avx512_psrai_q_512_0(
219; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
220;
221  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
222  ret <8 x i64> %1
223}
224
225define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
226; CHECK-LABEL: @avx512_psrai_q_512_15(
227; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
228; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
229;
230  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
231  ret <8 x i64> %1
232}
233
234define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
235; CHECK-LABEL: @avx512_psrai_q_512_64(
236; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
237; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
238;
239  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
240  ret <8 x i64> %1
241}
242
243;
244; LSHR - Immediate
245;
246
247define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
248; CHECK-LABEL: @sse2_psrli_w_0(
249; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
250;
251  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
252  ret <8 x i16> %1
253}
254
255define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
256; CHECK-LABEL: @sse2_psrli_w_15(
257; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
258; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
259;
260  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
261  ret <8 x i16> %1
262}
263
264define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
265; CHECK-LABEL: @sse2_psrli_w_64(
266; CHECK-NEXT:    ret <8 x i16> zeroinitializer
267;
268  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
269  ret <8 x i16> %1
270}
271
272define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
273; CHECK-LABEL: @sse2_psrli_d_0(
274; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
275;
276  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
277  ret <4 x i32> %1
278}
279
280define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
281; CHECK-LABEL: @sse2_psrli_d_15(
282; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
283; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
284;
285  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
286  ret <4 x i32> %1
287}
288
289define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
290; CHECK-LABEL: @sse2_psrli_d_64(
291; CHECK-NEXT:    ret <4 x i32> zeroinitializer
292;
293  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
294  ret <4 x i32> %1
295}
296
297define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
298; CHECK-LABEL: @sse2_psrli_q_0(
299; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
300;
301  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
302  ret <2 x i64> %1
303}
304
305define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
306; CHECK-LABEL: @sse2_psrli_q_15(
307; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
308; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
309;
310  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
311  ret <2 x i64> %1
312}
313
314define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
315; CHECK-LABEL: @sse2_psrli_q_64(
316; CHECK-NEXT:    ret <2 x i64> zeroinitializer
317;
318  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
319  ret <2 x i64> %1
320}
321
322define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
323; CHECK-LABEL: @avx2_psrli_w_0(
324; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
325;
326  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
327  ret <16 x i16> %1
328}
329
330define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
331; CHECK-LABEL: @avx2_psrli_w_15(
332; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
333; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
334;
335  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
336  ret <16 x i16> %1
337}
338
339define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
340; CHECK-LABEL: @avx2_psrli_w_64(
341; CHECK-NEXT:    ret <16 x i16> zeroinitializer
342;
343  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
344  ret <16 x i16> %1
345}
346
347define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
348; CHECK-LABEL: @avx2_psrli_d_0(
349; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
350;
351  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
352  ret <8 x i32> %1
353}
354
355define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
356; CHECK-LABEL: @avx2_psrli_d_15(
357; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
358; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
359;
360  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
361  ret <8 x i32> %1
362}
363
364define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
365; CHECK-LABEL: @avx2_psrli_d_64(
366; CHECK-NEXT:    ret <8 x i32> zeroinitializer
367;
368  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
369  ret <8 x i32> %1
370}
371
372define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
373; CHECK-LABEL: @avx2_psrli_q_0(
374; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
375;
376  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
377  ret <4 x i64> %1
378}
379
380define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
381; CHECK-LABEL: @avx2_psrli_q_15(
382; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
383; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
384;
385  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
386  ret <4 x i64> %1
387}
388
389define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
390; CHECK-LABEL: @avx2_psrli_q_64(
391; CHECK-NEXT:    ret <4 x i64> zeroinitializer
392;
393  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
394  ret <4 x i64> %1
395}
396
397define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
398; CHECK-LABEL: @avx512_psrli_w_512_0(
399; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
400;
401  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
402  ret <32 x i16> %1
403}
404
405define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
406; CHECK-LABEL: @avx512_psrli_w_512_15(
407; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
408; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
409;
410  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
411  ret <32 x i16> %1
412}
413
414define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
415; CHECK-LABEL: @avx512_psrli_w_512_64(
416; CHECK-NEXT:    ret <32 x i16> zeroinitializer
417;
418  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
419  ret <32 x i16> %1
420}
421
422define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
423; CHECK-LABEL: @avx512_psrli_d_512_0(
424; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
425;
426  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
427  ret <16 x i32> %1
428}
429
430define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
431; CHECK-LABEL: @avx512_psrli_d_512_15(
432; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
433; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
434;
435  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
436  ret <16 x i32> %1
437}
438
439define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
440; CHECK-LABEL: @avx512_psrli_d_512_64(
441; CHECK-NEXT:    ret <16 x i32> zeroinitializer
442;
443  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
444  ret <16 x i32> %1
445}
446
447define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
448; CHECK-LABEL: @avx512_psrli_q_512_0(
449; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
450;
451  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
452  ret <8 x i64> %1
453}
454
455define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
456; CHECK-LABEL: @avx512_psrli_q_512_15(
457; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
458; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
459;
460  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
461  ret <8 x i64> %1
462}
463
464define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
465; CHECK-LABEL: @avx512_psrli_q_512_64(
466; CHECK-NEXT:    ret <8 x i64> zeroinitializer
467;
468  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
469  ret <8 x i64> %1
470}
471
472;
473; SHL - Immediate
474;
475
476define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
477; CHECK-LABEL: @sse2_pslli_w_0(
478; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
479;
480  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
481  ret <8 x i16> %1
482}
483
484define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
485; CHECK-LABEL: @sse2_pslli_w_15(
486; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
487; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
488;
489  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
490  ret <8 x i16> %1
491}
492
493define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
494; CHECK-LABEL: @sse2_pslli_w_64(
495; CHECK-NEXT:    ret <8 x i16> zeroinitializer
496;
497  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
498  ret <8 x i16> %1
499}
500
501define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
502; CHECK-LABEL: @sse2_pslli_d_0(
503; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
504;
505  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
506  ret <4 x i32> %1
507}
508
509define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
510; CHECK-LABEL: @sse2_pslli_d_15(
511; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
512; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
513;
514  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
515  ret <4 x i32> %1
516}
517
518define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
519; CHECK-LABEL: @sse2_pslli_d_64(
520; CHECK-NEXT:    ret <4 x i32> zeroinitializer
521;
522  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
523  ret <4 x i32> %1
524}
525
526define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
527; CHECK-LABEL: @sse2_pslli_q_0(
528; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
529;
530  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
531  ret <2 x i64> %1
532}
533
534define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
535; CHECK-LABEL: @sse2_pslli_q_15(
536; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
537; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
538;
539  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
540  ret <2 x i64> %1
541}
542
543define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
544; CHECK-LABEL: @sse2_pslli_q_64(
545; CHECK-NEXT:    ret <2 x i64> zeroinitializer
546;
547  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
548  ret <2 x i64> %1
549}
550
551define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
552; CHECK-LABEL: @avx2_pslli_w_0(
553; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
554;
555  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
556  ret <16 x i16> %1
557}
558
559define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
560; CHECK-LABEL: @avx2_pslli_w_15(
561; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
562; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
563;
564  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
565  ret <16 x i16> %1
566}
567
568define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
569; CHECK-LABEL: @avx2_pslli_w_64(
570; CHECK-NEXT:    ret <16 x i16> zeroinitializer
571;
572  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
573  ret <16 x i16> %1
574}
575
576define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
577; CHECK-LABEL: @avx2_pslli_d_0(
578; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
579;
580  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
581  ret <8 x i32> %1
582}
583
584define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
585; CHECK-LABEL: @avx2_pslli_d_15(
586; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
587; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
588;
589  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
590  ret <8 x i32> %1
591}
592
593define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
594; CHECK-LABEL: @avx2_pslli_d_64(
595; CHECK-NEXT:    ret <8 x i32> zeroinitializer
596;
597  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
598  ret <8 x i32> %1
599}
600
601define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
602; CHECK-LABEL: @avx2_pslli_q_0(
603; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
604;
605  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
606  ret <4 x i64> %1
607}
608
609define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
610; CHECK-LABEL: @avx2_pslli_q_15(
611; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
612; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
613;
614  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
615  ret <4 x i64> %1
616}
617
618define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
619; CHECK-LABEL: @avx2_pslli_q_64(
620; CHECK-NEXT:    ret <4 x i64> zeroinitializer
621;
622  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
623  ret <4 x i64> %1
624}
625
626define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
627; CHECK-LABEL: @avx512_pslli_w_512_0(
628; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
629;
630  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
631  ret <32 x i16> %1
632}
633
634define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
635; CHECK-LABEL: @avx512_pslli_w_512_15(
636; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
637; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
638;
639  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
640  ret <32 x i16> %1
641}
642
643define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
644; CHECK-LABEL: @avx512_pslli_w_512_64(
645; CHECK-NEXT:    ret <32 x i16> zeroinitializer
646;
647  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
648  ret <32 x i16> %1
649}
650
651define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
652; CHECK-LABEL: @avx512_pslli_d_512_0(
653; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
654;
655  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
656  ret <16 x i32> %1
657}
658
659define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
660; CHECK-LABEL: @avx512_pslli_d_512_15(
661; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
662; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
663;
664  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
665  ret <16 x i32> %1
666}
667
668define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
669; CHECK-LABEL: @avx512_pslli_d_512_64(
670; CHECK-NEXT:    ret <16 x i32> zeroinitializer
671;
672  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
673  ret <16 x i32> %1
674}
675
676define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
677; CHECK-LABEL: @avx512_pslli_q_512_0(
678; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
679;
680  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
681  ret <8 x i64> %1
682}
683
684define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
685; CHECK-LABEL: @avx512_pslli_q_512_15(
686; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
687; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
688;
689  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
690  ret <8 x i64> %1
691}
692
693define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
694; CHECK-LABEL: @avx512_pslli_q_512_64(
695; CHECK-NEXT:    ret <8 x i64> zeroinitializer
696;
697  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
698  ret <8 x i64> %1
699}
700
701;
702; ASHR - Constant Vector
703;
704
705define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
706; CHECK-LABEL: @sse2_psra_w_0(
707; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
708;
709  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
710  ret <8 x i16> %1
711}
712
713define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
714; CHECK-LABEL: @sse2_psra_w_15(
715; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
716; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
717;
718  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
719  ret <8 x i16> %1
720}
721
722define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
723; CHECK-LABEL: @sse2_psra_w_15_splat(
724; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
725; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
726;
727  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
728  ret <8 x i16> %1
729}
730
731define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
732; CHECK-LABEL: @sse2_psra_w_64(
733; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
734; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
735;
736  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
737  ret <8 x i16> %1
738}
739
740define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
741; CHECK-LABEL: @sse2_psra_d_0(
742; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
743;
744  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
745  ret <4 x i32> %1
746}
747
748define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
749; CHECK-LABEL: @sse2_psra_d_15(
750; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
751; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
752;
753  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
754  ret <4 x i32> %1
755}
756
757define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
758; CHECK-LABEL: @sse2_psra_d_15_splat(
759; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
760; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
761;
762  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
763  ret <4 x i32> %1
764}
765
766define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
767; CHECK-LABEL: @sse2_psra_d_64(
768; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
769; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
770;
771  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
772  ret <4 x i32> %1
773}
774
775define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
776; CHECK-LABEL: @avx2_psra_w_0(
777; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
778;
779  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
780  ret <16 x i16> %1
781}
782
783define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
784; CHECK-LABEL: @avx2_psra_w_15(
785; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
786; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
787;
788  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
789  ret <16 x i16> %1
790}
791
792define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
793; CHECK-LABEL: @avx2_psra_w_15_splat(
794; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
795; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
796;
797  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
798  ret <16 x i16> %1
799}
800
801define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
802; CHECK-LABEL: @avx2_psra_w_64(
803; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
804; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
805;
806  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
807  ret <16 x i16> %1
808}
809
810define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
811; CHECK-LABEL: @avx2_psra_d_0(
812; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
813;
814  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
815  ret <8 x i32> %1
816}
817
818define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
819; CHECK-LABEL: @avx2_psra_d_15(
820; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
821; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
822;
823  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
824  ret <8 x i32> %1
825}
826
827define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
828; CHECK-LABEL: @avx2_psra_d_15_splat(
829; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
830; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
831;
832  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
833  ret <8 x i32> %1
834}
835
836define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
837; CHECK-LABEL: @avx2_psra_d_64(
838; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
839; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
840;
841  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
842  ret <8 x i32> %1
843}
844
845define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
846; CHECK-LABEL: @avx512_psra_q_128_0(
847; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
848;
849  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
850  ret <2 x i64> %1
851}
852
853define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
854; CHECK-LABEL: @avx512_psra_q_128_15(
855; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
856; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
857;
858  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
859  ret <2 x i64> %1
860}
861
862define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
863; CHECK-LABEL: @avx512_psra_q_128_64(
864; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
865; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
866;
867  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
868  ret <2 x i64> %1
869}
870
871define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
872; CHECK-LABEL: @avx512_psra_q_256_0(
873; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
874;
875  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
876  ret <4 x i64> %1
877}
878
879define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
880; CHECK-LABEL: @avx512_psra_q_256_15(
881; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
882; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
883;
884  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
885  ret <4 x i64> %1
886}
887
888define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
889; CHECK-LABEL: @avx512_psra_q_256_64(
890; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
891; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
892;
893  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
894  ret <4 x i64> %1
895}
896
897define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
898; CHECK-LABEL: @avx512_psra_w_512_0(
899; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
900;
901  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
902  ret <32 x i16> %1
903}
904
905define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
906; CHECK-LABEL: @avx512_psra_w_512_15(
907; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
908; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
909;
910  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
911  ret <32 x i16> %1
912}
913
914define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
915; CHECK-LABEL: @avx512_psra_w_512_15_splat(
916; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
917; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
918;
919  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
920  ret <32 x i16> %1
921}
922
923define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
924; CHECK-LABEL: @avx512_psra_w_512_64(
925; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
926; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
927;
928  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
929  ret <32 x i16> %1
930}
931
932define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
933; CHECK-LABEL: @avx512_psra_d_512_0(
934; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
935;
936  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
937  ret <16 x i32> %1
938}
939
940define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
941; CHECK-LABEL: @avx512_psra_d_512_15(
942; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
943; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
944;
945  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
946  ret <16 x i32> %1
947}
948
949define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
950; CHECK-LABEL: @avx512_psra_d_512_15_splat(
951; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
952; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
953;
954  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
955  ret <16 x i32> %1
956}
957
958define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
959; CHECK-LABEL: @avx512_psra_d_512_64(
960; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
961; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
962;
963  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
964  ret <16 x i32> %1
965}
966
967define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
968; CHECK-LABEL: @avx512_psra_q_512_0(
969; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
970;
971  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
972  ret <8 x i64> %1
973}
974
975define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
976; CHECK-LABEL: @avx512_psra_q_512_15(
977; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
978; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
979;
980  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
981  ret <8 x i64> %1
982}
983
984define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
985; CHECK-LABEL: @avx512_psra_q_512_64(
986; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
987; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
988;
989  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
990  ret <8 x i64> %1
991}
992
993;
994; LSHR - Constant Vector
995;
996
997define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
998; CHECK-LABEL: @sse2_psrl_w_0(
999; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
1000;
1001  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
1002  ret <8 x i16> %1
1003}
1004
1005define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
1006; CHECK-LABEL: @sse2_psrl_w_15(
1007; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
1008; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1009;
1010  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1011  ret <8 x i16> %1
1012}
1013
1014define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
1015; CHECK-LABEL: @sse2_psrl_w_15_splat(
1016; CHECK-NEXT:    ret <8 x i16> zeroinitializer
1017;
1018  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1019  ret <8 x i16> %1
1020}
1021
1022define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
1023; CHECK-LABEL: @sse2_psrl_w_64(
1024; CHECK-NEXT:    ret <8 x i16> zeroinitializer
1025;
1026  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1027  ret <8 x i16> %1
1028}
1029
1030define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
1031; CHECK-LABEL: @sse2_psrl_d_0(
1032; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
1033;
1034  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1035  ret <4 x i32> %1
1036}
1037
1038define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
1039; CHECK-LABEL: @sse2_psrl_d_15(
1040; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
1041; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1042;
1043  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1044  ret <4 x i32> %1
1045}
1046
1047define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
1048; CHECK-LABEL: @sse2_psrl_d_15_splat(
1049; CHECK-NEXT:    ret <4 x i32> zeroinitializer
1050;
1051  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1052  ret <4 x i32> %1
1053}
1054
1055define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
1056; CHECK-LABEL: @sse2_psrl_d_64(
1057; CHECK-NEXT:    ret <4 x i32> zeroinitializer
1058;
1059  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1060  ret <4 x i32> %1
1061}
1062
1063define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
1064; CHECK-LABEL: @sse2_psrl_q_0(
1065; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
1066;
1067  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1068  ret <2 x i64> %1
1069}
1070
1071define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
1072; CHECK-LABEL: @sse2_psrl_q_15(
1073; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
1074; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1075;
1076  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1077  ret <2 x i64> %1
1078}
1079
1080define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
1081; CHECK-LABEL: @sse2_psrl_q_64(
1082; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1083;
1084  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1085  ret <2 x i64> %1
1086}
1087
1088define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
1089; CHECK-LABEL: @avx2_psrl_w_0(
1090; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
1091;
1092  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
1093  ret <16 x i16> %1
1094}
1095
1096define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
1097; CHECK-LABEL: @avx2_psrl_w_15(
1098; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
1099; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1100;
1101  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1102  ret <16 x i16> %1
1103}
1104
1105define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
1106; CHECK-LABEL: @avx2_psrl_w_15_splat(
1107; CHECK-NEXT:    ret <16 x i16> zeroinitializer
1108;
1109  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1110  ret <16 x i16> %1
1111}
1112
1113define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
1114; CHECK-LABEL: @avx2_psrl_w_64(
1115; CHECK-NEXT:    ret <16 x i16> zeroinitializer
1116;
1117  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1118  ret <16 x i16> %1
1119}
1120
1121define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
1122; CHECK-LABEL: @avx2_psrl_d_0(
1123; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
1124;
1125  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
1126  ret <8 x i32> %1
1127}
1128
1129define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
1130; CHECK-LABEL: @avx2_psrl_d_15(
1131; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
1132; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1133;
1134  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1135  ret <8 x i32> %1
1136}
1137
1138define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
1139; CHECK-LABEL: @avx2_psrl_d_15_splat(
1140; CHECK-NEXT:    ret <8 x i32> zeroinitializer
1141;
1142  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1143  ret <8 x i32> %1
1144}
1145
1146define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
1147; CHECK-LABEL: @avx2_psrl_d_64(
1148; CHECK-NEXT:    ret <8 x i32> zeroinitializer
1149;
1150  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1151  ret <8 x i32> %1
1152}
1153
1154define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
1155; CHECK-LABEL: @avx2_psrl_q_0(
1156; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
1157;
1158  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
1159  ret <4 x i64> %1
1160}
1161
1162define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
1163; CHECK-LABEL: @avx2_psrl_q_15(
1164; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
1165; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1166;
1167  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1168  ret <4 x i64> %1
1169}
1170
1171define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
1172; CHECK-LABEL: @avx2_psrl_q_64(
1173; CHECK-NEXT:    ret <4 x i64> zeroinitializer
1174;
1175  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1176  ret <4 x i64> %1
1177}
1178
1179define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
1180; CHECK-LABEL: @avx512_psrl_w_512_0(
1181; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
1182;
1183  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
1184  ret <32 x i16> %1
1185}
1186
1187define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
1188; CHECK-LABEL: @avx512_psrl_w_512_15(
1189; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
1190; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
1191;
1192  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1193  ret <32 x i16> %1
1194}
1195
1196define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
1197; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
1198; CHECK-NEXT:    ret <32 x i16> zeroinitializer
1199;
1200  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1201  ret <32 x i16> %1
1202}
1203
1204define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
1205; CHECK-LABEL: @avx512_psrl_w_512_64(
1206; CHECK-NEXT:    ret <32 x i16> zeroinitializer
1207;
1208  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1209  ret <32 x i16> %1
1210}
1211
1212define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
1213; CHECK-LABEL: @avx512_psrl_d_512_0(
1214; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
1215;
1216  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
1217  ret <16 x i32> %1
1218}
1219
1220define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
1221; CHECK-LABEL: @avx512_psrl_d_512_15(
1222; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
1223; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
1224;
1225  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1226  ret <16 x i32> %1
1227}
1228
1229define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
1230; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
1231; CHECK-NEXT:    ret <16 x i32> zeroinitializer
1232;
1233  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1234  ret <16 x i32> %1
1235}
1236
1237define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
1238; CHECK-LABEL: @avx512_psrl_d_512_64(
1239; CHECK-NEXT:    ret <16 x i32> zeroinitializer
1240;
1241  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1242  ret <16 x i32> %1
1243}
1244
1245define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
1246; CHECK-LABEL: @avx512_psrl_q_512_0(
1247; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
1248;
1249  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
1250  ret <8 x i64> %1
1251}
1252
1253define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
1254; CHECK-LABEL: @avx512_psrl_q_512_15(
1255; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
1256; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
1257;
1258  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1259  ret <8 x i64> %1
1260}
1261
1262define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
1263; CHECK-LABEL: @avx512_psrl_q_512_64(
1264; CHECK-NEXT:    ret <8 x i64> zeroinitializer
1265;
1266  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1267  ret <8 x i64> %1
1268}
1269
1270;
1271; SHL - Constant Vector
1272;
1273
1274define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
1275; CHECK-LABEL: @sse2_psll_w_0(
1276; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
1277;
1278  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
1279  ret <8 x i16> %1
1280}
1281
1282define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
1283; CHECK-LABEL: @sse2_psll_w_15(
1284; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
1285; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1286;
1287  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1288  ret <8 x i16> %1
1289}
1290
1291define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
1292; CHECK-LABEL: @sse2_psll_w_15_splat(
1293; CHECK-NEXT:    ret <8 x i16> zeroinitializer
1294;
1295  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1296  ret <8 x i16> %1
1297}
1298
1299define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
1300; CHECK-LABEL: @sse2_psll_w_64(
1301; CHECK-NEXT:    ret <8 x i16> zeroinitializer
1302;
1303  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1304  ret <8 x i16> %1
1305}
1306
1307define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
1308; CHECK-LABEL: @sse2_psll_d_0(
1309; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
1310;
1311  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1312  ret <4 x i32> %1
1313}
1314
1315define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
1316; CHECK-LABEL: @sse2_psll_d_15(
1317; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
1318; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1319;
1320  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1321  ret <4 x i32> %1
1322}
1323
1324define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
1325; CHECK-LABEL: @sse2_psll_d_15_splat(
1326; CHECK-NEXT:    ret <4 x i32> zeroinitializer
1327;
1328  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1329  ret <4 x i32> %1
1330}
1331
1332define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
1333; CHECK-LABEL: @sse2_psll_d_64(
1334; CHECK-NEXT:    ret <4 x i32> zeroinitializer
1335;
1336  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1337  ret <4 x i32> %1
1338}
1339
1340define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
1341; CHECK-LABEL: @sse2_psll_q_0(
1342; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
1343;
1344  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1345  ret <2 x i64> %1
1346}
1347
1348define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
1349; CHECK-LABEL: @sse2_psll_q_15(
1350; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
1351; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1352;
1353  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1354  ret <2 x i64> %1
1355}
1356
1357define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
1358; CHECK-LABEL: @sse2_psll_q_64(
1359; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1360;
1361  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1362  ret <2 x i64> %1
1363}
1364
1365define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
1366; CHECK-LABEL: @avx2_psll_w_0(
1367; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
1368;
1369  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
1370  ret <16 x i16> %1
1371}
1372
1373define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
1374; CHECK-LABEL: @avx2_psll_w_15(
1375; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
1376; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1377;
1378  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1379  ret <16 x i16> %1
1380}
1381
1382define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
1383; CHECK-LABEL: @avx2_psll_w_15_splat(
1384; CHECK-NEXT:    ret <16 x i16> zeroinitializer
1385;
1386  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1387  ret <16 x i16> %1
1388}
1389
1390define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
1391; CHECK-LABEL: @avx2_psll_w_64(
1392; CHECK-NEXT:    ret <16 x i16> zeroinitializer
1393;
1394  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1395  ret <16 x i16> %1
1396}
1397
1398define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
1399; CHECK-LABEL: @avx2_psll_d_0(
1400; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
1401;
1402  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
1403  ret <8 x i32> %1
1404}
1405
1406define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
1407; CHECK-LABEL: @avx2_psll_d_15(
1408; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
1409; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1410;
1411  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1412  ret <8 x i32> %1
1413}
1414
1415define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
1416; CHECK-LABEL: @avx2_psll_d_15_splat(
1417; CHECK-NEXT:    ret <8 x i32> zeroinitializer
1418;
1419  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1420  ret <8 x i32> %1
1421}
1422
1423define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
1424; CHECK-LABEL: @avx2_psll_d_64(
1425; CHECK-NEXT:    ret <8 x i32> zeroinitializer
1426;
1427  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1428  ret <8 x i32> %1
1429}
1430
1431define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
1432; CHECK-LABEL: @avx2_psll_q_0(
1433; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
1434;
1435  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
1436  ret <4 x i64> %1
1437}
1438
1439define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
1440; CHECK-LABEL: @avx2_psll_q_15(
1441; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
1442; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1443;
1444  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1445  ret <4 x i64> %1
1446}
1447
1448define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
1449; CHECK-LABEL: @avx2_psll_q_64(
1450; CHECK-NEXT:    ret <4 x i64> zeroinitializer
1451;
1452  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1453  ret <4 x i64> %1
1454}
1455
1456define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
1457; CHECK-LABEL: @avx512_psll_w_512_0(
1458; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
1459;
1460  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
1461  ret <32 x i16> %1
1462}
1463
1464define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
1465; CHECK-LABEL: @avx512_psll_w_512_15(
1466; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
1467; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
1468;
1469  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1470  ret <32 x i16> %1
1471}
1472
1473define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
1474; CHECK-LABEL: @avx512_psll_w_15_512_splat(
1475; CHECK-NEXT:    ret <32 x i16> zeroinitializer
1476;
1477  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1478  ret <32 x i16> %1
1479}
1480
1481define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
1482; CHECK-LABEL: @avx512_psll_w_512_64(
1483; CHECK-NEXT:    ret <32 x i16> zeroinitializer
1484;
1485  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1486  ret <32 x i16> %1
1487}
1488
1489define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
1490; CHECK-LABEL: @avx512_psll_d_512_0(
1491; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
1492;
1493  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
1494  ret <16 x i32> %1
1495}
1496
1497define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
1498; CHECK-LABEL: @avx512_psll_d_512_15(
1499; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
1500; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
1501;
1502  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1503  ret <16 x i32> %1
1504}
1505
1506define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
1507; CHECK-LABEL: @avx512_psll_d_512_15_splat(
1508; CHECK-NEXT:    ret <16 x i32> zeroinitializer
1509;
1510  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1511  ret <16 x i32> %1
1512}
1513
1514define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
1515; CHECK-LABEL: @avx512_psll_d_512_64(
1516; CHECK-NEXT:    ret <16 x i32> zeroinitializer
1517;
1518  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1519  ret <16 x i32> %1
1520}
1521
1522define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
1523; CHECK-LABEL: @avx512_psll_q_512_0(
1524; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
1525;
1526  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
1527  ret <8 x i64> %1
1528}
1529
1530define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
1531; CHECK-LABEL: @avx512_psll_q_512_15(
1532; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
1533; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
1534;
1535  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1536  ret <8 x i64> %1
1537}
1538
1539define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
1540; CHECK-LABEL: @avx512_psll_q_512_64(
1541; CHECK-NEXT:    ret <8 x i64> zeroinitializer
1542;
1543  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1544  ret <8 x i64> %1
1545}
1546
1547;
1548; ASHR - Constant Per-Element Vector
1549;
1550
1551define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
1552; CHECK-LABEL: @avx2_psrav_d_128_0(
1553; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
1554;
1555  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1556  ret <4 x i32> %1
1557}
1558
1559define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
1560; CHECK-LABEL: @avx2_psrav_d_256_0(
1561; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
1562;
1563  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1564  ret <8 x i32> %1
1565}
1566
1567define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
1568; CHECK-LABEL: @avx512_psrav_d_512_0(
1569; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
1570;
1571  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
1572  ret <16 x i32> %1
1573}
1574
1575define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
1576; CHECK-LABEL: @avx2_psrav_d_128_var(
1577; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
1578; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1579;
1580  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1581  ret <4 x i32> %1
1582}
1583
1584define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
1585; CHECK-LABEL: @avx2_psrav_d_256_var(
1586; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1587; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1588;
1589  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
1590  ret <8 x i32> %1
1591}
1592
1593define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
1594; CHECK-LABEL: @avx512_psrav_d_512_var(
1595; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1596; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
1597;
1598  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
1599  ret <16 x i32> %1
1600}
1601
1602define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
1603; CHECK-LABEL: @avx2_psrav_d_128_allbig(
1604; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 undef>
1605; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1606;
1607  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1608  ret <4 x i32> %1
1609}
1610
1611define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
1612; CHECK-LABEL: @avx2_psrav_d_256_allbig(
1613; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1614; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1615;
1616  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1617  ret <8 x i32> %1
1618}
1619
1620define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
1621; CHECK-LABEL: @avx512_psrav_d_512_allbig(
1622; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1623; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
1624;
1625  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1626  ret <16 x i32> %1
1627}
1628
1629define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
1630; CHECK-LABEL: @avx2_psrav_d_128_undef(
1631; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
1632; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1633;
1634  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
1635  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
1636  ret <4 x i32> %2
1637}
1638
1639define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
1640; CHECK-LABEL: @avx2_psrav_d_256_undef(
1641; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1642; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1643;
1644  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
1645  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
1646  ret <8 x i32> %2
1647}
1648
1649define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
1650; CHECK-LABEL: @avx512_psrav_d_512_undef(
1651; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1652; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
1653;
1654  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
1655  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
1656  ret <16 x i32> %2
1657}
1658
1659define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
1660; CHECK-LABEL: @avx512_psrav_q_128_0(
1661; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
1662;
1663  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
1664  ret <2 x i64> %1
1665}
1666
1667define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
1668; CHECK-LABEL: @avx512_psrav_q_256_0(
1669; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
1670;
1671  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1672  ret <4 x i64> %1
1673}
1674
1675define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
1676; CHECK-LABEL: @avx512_psrav_q_128_var(
1677; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 0, i64 8>
1678; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1679;
1680  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1681  ret <2 x i64> %1
1682}
1683
1684define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
1685; CHECK-LABEL: @avx512_psrav_q_256_var(
1686; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
1687; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1688;
1689  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
1690  ret <4 x i64> %1
1691}
1692
1693define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
1694; CHECK-LABEL: @avx512_psrav_q_128_allbig(
1695; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 undef>
1696; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1697;
1698  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
1699  ret <2 x i64> %1
1700}
1701
1702define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
1703; CHECK-LABEL: @avx512_psrav_q_256_allbig(
1704; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63>
1705; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1706;
1707  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
1708  ret <4 x i64> %1
1709}
1710
1711define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
1712; CHECK-LABEL: @avx512_psrav_q_128_undef(
1713; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 undef, i64 8>
1714; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1715;
1716  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
1717  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
1718  ret <2 x i64> %2
1719}
1720
1721define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
1722; CHECK-LABEL: @avx512_psrav_q_256_undef(
1723; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31>
1724; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1725;
1726  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1727  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
1728  ret <4 x i64> %2
1729}
1730
1731define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
1732; CHECK-LABEL: @avx512_psrav_q_512_0(
1733; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
1734;
1735  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
1736  ret <8 x i64> %1
1737}
1738
1739define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
1740; CHECK-LABEL: @avx512_psrav_q_512_var(
1741; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
1742; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
1743;
1744  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
1745  ret <8 x i64> %1
1746}
1747
1748define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
1749; CHECK-LABEL: @avx512_psrav_q_512_allbig(
1750; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
1751; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
1752;
1753  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
1754  ret <8 x i64> %1
1755}
1756
1757define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
1758; CHECK-LABEL: @avx512_psrav_q_512_undef(
1759; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
1760; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
1761;
1762  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1763  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
1764  ret <8 x i64> %2
1765}
1766
1767define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
1768; CHECK-LABEL: @avx512_psrav_w_128_0(
1769; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
1770;
1771  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
1772  ret <8 x i16> %1
1773}
1774
1775define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
1776; CHECK-LABEL: @avx512_psrav_w_128_var(
1777; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
1778; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1779;
1780  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
1781  ret <8 x i16> %1
1782}
1783
1784define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
1785; CHECK-LABEL: @avx512_psrav_w_128_allbig(
1786; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
1787; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1788;
1789  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
1790  ret <8 x i16> %1
1791}
1792
1793define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
1794; CHECK-LABEL: @avx512_psrav_w_128_undef(
1795; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
1796; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1797;
1798  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
1799  %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
1800  ret <8 x i16> %2
1801}
1802
1803define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
1804; CHECK-LABEL: @avx512_psrav_w_256_0(
1805; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
1806;
1807  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
1808  ret <16 x i16> %1
1809}
1810
1811define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
1812; CHECK-LABEL: @avx512_psrav_w_256_var(
1813; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1814; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1815;
1816  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1817  ret <16 x i16> %1
1818}
1819
1820define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
1821; CHECK-LABEL: @avx512_psrav_w_256_allbig(
1822; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
1823; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1824;
1825  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
1826  ret <16 x i16> %1
1827}
1828
1829define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
1830; CHECK-LABEL: @avx512_psrav_w_256_undef(
1831; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1832; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1833;
1834  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
1835  %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
1836  ret <16 x i16> %2
1837}
1838
1839define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
1840; CHECK-LABEL: @avx512_psrav_w_512_0(
1841; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
1842;
1843  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
1844  ret <32 x i16> %1
1845}
1846
1847define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
1848; CHECK-LABEL: @avx512_psrav_w_512_var(
1849; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
1850; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
1851;
1852  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
1853  ret <32 x i16> %1
1854}
1855
1856define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
1857; CHECK-LABEL: @avx512_psrav_w_512_allbig(
1858; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
1859; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
1860;
1861  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
1862  ret <32 x i16> %1
1863}
1864
1865define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
1866; CHECK-LABEL: @avx512_psrav_w_512_undef(
1867; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
1868; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
1869;
1870  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
1871  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
1872  ret <32 x i16> %2
1873}
1874
1875;
1876; LSHR - Constant Per-Element Vector
1877;
1878
1879define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
1880; CHECK-LABEL: @avx2_psrlv_d_128_0(
1881; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
1882;
1883  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1884  ret <4 x i32> %1
1885}
1886
1887define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
1888; CHECK-LABEL: @avx2_psrlv_d_256_0(
1889; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
1890;
1891  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1892  ret <8 x i32> %1
1893}
1894
1895define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
1896; CHECK-LABEL: @avx2_psrlv_d_128_var(
1897; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
1898; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1899;
1900  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
1901  ret <4 x i32> %1
1902}
1903
1904define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
1905; CHECK-LABEL: @avx2_psrlv_d_256_var(
1906; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1907; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1908;
1909  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
1910  ret <8 x i32> %1
1911}
1912
1913define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
1914; CHECK-LABEL: @avx2_psrlv_d_128_big(
1915; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1916; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1917;
1918  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1919  ret <4 x i32> %1
1920}
1921
1922define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
1923; CHECK-LABEL: @avx2_psrlv_d_256_big(
1924; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1925; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1926;
1927  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1928  ret <8 x i32> %1
1929}
1930
1931define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
1932; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
1933; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
1934;
1935  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1936  ret <4 x i32> %1
1937}
1938
1939define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
1940; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
1941; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1942;
1943  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1944  ret <8 x i32> %1
1945}
1946
1947define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
1948; CHECK-LABEL: @avx2_psrlv_d_128_undef(
1949; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
1950; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1951;
1952  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
1953  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
1954  ret <4 x i32> %2
1955}
1956
1957define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
1958; CHECK-LABEL: @avx2_psrlv_d_256_undef(
1959; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
1960; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1961;
1962  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
1963  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
1964  ret <8 x i32> %2
1965}
1966
1967define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
1968; CHECK-LABEL: @avx2_psrlv_q_128_0(
1969; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
1970;
1971  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1972  ret <2 x i64> %1
1973}
1974
1975define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
1976; CHECK-LABEL: @avx2_psrlv_q_256_0(
1977; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
1978;
1979  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1980  ret <4 x i64> %1
1981}
1982
1983define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
1984; CHECK-LABEL: @avx2_psrlv_q_128_var(
1985; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 0, i64 8>
1986; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1987;
1988  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1989  ret <2 x i64> %1
1990}
1991
1992define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
1993; CHECK-LABEL: @avx2_psrlv_q_256_var(
1994; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
1995; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1996;
1997  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
1998  ret <4 x i64> %1
1999}
2000
2001define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
2002; CHECK-LABEL: @avx2_psrlv_q_128_big(
2003; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
2004; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
2005;
2006  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
2007  ret <2 x i64> %1
2008}
2009
2010define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
2011; CHECK-LABEL: @avx2_psrlv_q_256_big(
2012; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2013; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
2014;
2015  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2016  ret <4 x i64> %1
2017}
2018
2019define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
2020; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
2021; CHECK-NEXT:    ret <2 x i64> zeroinitializer
2022;
2023  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
2024  ret <2 x i64> %1
2025}
2026
2027define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
2028; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
2029; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
2030;
2031  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
2032  ret <4 x i64> %1
2033}
2034
2035; The shift amount is 0 (the poison lane could be 0), so we return the unshifted input.
2036
2037define <2 x i64> @avx2_psrlv_q_128_poison(<2 x i64> %v) {
2038; CHECK-LABEL: @avx2_psrlv_q_128_poison(
2039; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
2040;
2041  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
2042  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
2043  ret <2 x i64> %2
2044}
2045
2046define <4 x i64> @avx2_psrlv_q_256_poison(<4 x i64> %v) {
2047; CHECK-LABEL: @avx2_psrlv_q_256_poison(
2048; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
2049; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
2050;
2051  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
2052  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
2053  ret <4 x i64> %2
2054}
2055
2056define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
2057; CHECK-LABEL: @avx2_psrlv_d_512_0(
2058; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
2059;
2060  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
2061  ret <16 x i32> %1
2062}
2063
2064define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
2065; CHECK-LABEL: @avx512_psrlv_d_512_var(
2066; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2067; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2068;
2069  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2070  ret <16 x i32> %1
2071}
2072
2073define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
2074; CHECK-LABEL: @avx512_psrlv_d_512_big(
2075; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2076; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2077;
2078  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2079  ret <16 x i32> %1
2080}
2081
2082define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
2083; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
2084; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2085;
2086  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2087  ret <16 x i32> %1
2088}
2089
2090define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
2091; CHECK-LABEL: @avx512_psrlv_d_512_undef(
2092; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2093; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2094;
2095  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2096  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
2097  ret <16 x i32> %2
2098}
2099
2100define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
2101; CHECK-LABEL: @avx512_psrlv_q_512_0(
2102; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
2103;
2104  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
2105  ret <8 x i64> %1
2106}
2107
2108define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
2109; CHECK-LABEL: @avx512_psrlv_q_512_var(
2110; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2111; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2112;
2113  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
2114  ret <8 x i64> %1
2115}
2116
2117define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
2118; CHECK-LABEL: @avx512_psrlv_q_512_big(
2119; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2120; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2121;
2122  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2123  ret <8 x i64> %1
2124}
2125
2126define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
2127; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
2128; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
2129;
2130  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
2131  ret <8 x i64> %1
2132}
2133
2134define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
2135; CHECK-LABEL: @avx512_psrlv_q_512_undef(
2136; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2137; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2138;
2139  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
2140  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
2141  ret <8 x i64> %2
2142}
2143
2144define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
2145; CHECK-LABEL: @avx512_psrlv_w_128_0(
2146; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
2147;
2148  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
2149  ret <8 x i16> %1
2150}
2151
2152define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
2153; CHECK-LABEL: @avx512_psrlv_w_128_var(
2154; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2155; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2156;
2157  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2158  ret <8 x i16> %1
2159}
2160
2161define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
2162; CHECK-LABEL: @avx512_psrlv_w_128_big(
2163; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2164; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2165;
2166  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2167  ret <8 x i16> %1
2168}
2169
2170define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
2171; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
2172; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
2173;
2174  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
2175  ret <8 x i16> %1
2176}
2177
2178define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
2179; CHECK-LABEL: @avx512_psrlv_w_128_undef(
2180; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2181; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2182;
2183  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
2184  %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
2185  ret <8 x i16> %2
2186}
2187
2188define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
2189; CHECK-LABEL: @avx512_psrlv_w_256_0(
2190; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
2191;
2192  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
2193  ret <16 x i16> %1
2194}
2195
2196define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
2197; CHECK-LABEL: @avx512_psrlv_w_256_var(
2198; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2199; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2200;
2201  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2202  ret <16 x i16> %1
2203}
2204
2205define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
2206; CHECK-LABEL: @avx512_psrlv_w_256_big(
2207; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2208; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2209;
2210  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2211  ret <16 x i16> %1
2212}
2213
2214define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
2215; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
2216; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
2217;
2218  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
2219  ret <16 x i16> %1
2220}
2221
2222define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
2223; CHECK-LABEL: @avx512_psrlv_w_256_undef(
2224; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2225; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2226;
2227  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
2228  %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
2229  ret <16 x i16> %2
2230}
2231
2232define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
2233; CHECK-LABEL: @avx512_psrlv_w_512_0(
2234; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
2235;
2236  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
2237  ret <32 x i16> %1
2238}
2239
2240define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
2241; CHECK-LABEL: @avx512_psrlv_w_512_var(
2242; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2243; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2244;
2245  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2246  ret <32 x i16> %1
2247}
2248
2249define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
2250; CHECK-LABEL: @avx512_psrlv_w_512_big(
2251; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2252; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2253;
2254  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2255  ret <32 x i16> %1
2256}
2257
2258define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
2259; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
2260; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
2261;
2262  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
2263  ret <32 x i16> %1
2264}
2265
2266define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
2267; CHECK-LABEL: @avx512_psrlv_w_512_undef(
2268; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2269; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2270;
2271  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
2272  %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
2273  ret <32 x i16> %2
2274}
2275
2276;
2277; SHL - Constant Per-Element Vector
2278;
2279
2280define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
2281; CHECK-LABEL: @avx2_psllv_d_128_0(
2282; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
2283;
2284  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
2285  ret <4 x i32> %1
2286}
2287
2288define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
2289; CHECK-LABEL: @avx2_psllv_d_256_0(
2290; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
2291;
2292  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
2293  ret <8 x i32> %1
2294}
2295
2296define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
2297; CHECK-LABEL: @avx2_psllv_d_128_var(
2298; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
2299; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2300;
2301  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
2302  ret <4 x i32> %1
2303}
2304
2305define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
2306; CHECK-LABEL: @avx2_psllv_d_256_var(
2307; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2308; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
2309;
2310  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2311  ret <8 x i32> %1
2312}
2313
2314define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
2315; CHECK-LABEL: @avx2_psllv_d_128_big(
2316; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
2317; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2318;
2319  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
2320  ret <4 x i32> %1
2321}
2322
2323define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
2324; CHECK-LABEL: @avx2_psllv_d_256_big(
2325; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2326; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
2327;
2328  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2329  ret <8 x i32> %1
2330}
2331
2332define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
2333; CHECK-LABEL: @avx2_psllv_d_128_allbig(
2334; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
2335;
2336  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
2337  ret <4 x i32> %1
2338}
2339
2340define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
2341; CHECK-LABEL: @avx2_psllv_d_256_allbig(
2342; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2343;
2344  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2345  ret <8 x i32> %1
2346}
2347
2348define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
2349; CHECK-LABEL: @avx2_psllv_d_128_undef(
2350; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
2351; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2352;
2353  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
2354  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
2355  ret <4 x i32> %2
2356}
2357
2358define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
2359; CHECK-LABEL: @avx2_psllv_d_256_undef(
2360; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2361; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
2362;
2363  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2364  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
2365  ret <8 x i32> %2
2366}
2367
2368define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
2369; CHECK-LABEL: @avx2_psllv_q_128_0(
2370; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
2371;
2372  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
2373  ret <2 x i64> %1
2374}
2375
2376define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
2377; CHECK-LABEL: @avx2_psllv_q_256_0(
2378; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
2379;
2380  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
2381  ret <4 x i64> %1
2382}
2383
2384define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
2385; CHECK-LABEL: @avx2_psllv_q_128_var(
2386; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 0, i64 8>
2387; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
2388;
2389  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
2390  ret <2 x i64> %1
2391}
2392
2393define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
2394; CHECK-LABEL: @avx2_psllv_q_256_var(
2395; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
2396; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
2397;
2398  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
2399  ret <4 x i64> %1
2400}
2401
2402define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
2403; CHECK-LABEL: @avx2_psllv_q_128_big(
2404; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
2405; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
2406;
2407  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
2408  ret <2 x i64> %1
2409}
2410
2411define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
2412; CHECK-LABEL: @avx2_psllv_q_256_big(
2413; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2414; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
2415;
2416  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2417  ret <4 x i64> %1
2418}
2419
2420define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
2421; CHECK-LABEL: @avx2_psllv_q_128_allbig(
2422; CHECK-NEXT:    ret <2 x i64> zeroinitializer
2423;
2424  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
2425  ret <2 x i64> %1
2426}
2427
2428define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
2429; CHECK-LABEL: @avx2_psllv_q_256_allbig(
2430; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
2431;
2432  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
2433  ret <4 x i64> %1
2434}
2435
2436; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
2437
2438define <2 x i64> @avx2_psllv_q_128_poison(<2 x i64> %v) {
2439; CHECK-LABEL: @avx2_psllv_q_128_poison(
2440; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
2441;
2442  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
2443  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
2444  ret <2 x i64> %2
2445}
2446
2447define <4 x i64> @avx2_psllv_q_256_poison(<4 x i64> %v) {
2448; CHECK-LABEL: @avx2_psllv_q_256_poison(
2449; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
2450; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
2451;
2452  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
2453  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
2454  ret <4 x i64> %2
2455}
2456
2457define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
2458; CHECK-LABEL: @avx512_psllv_d_512_0(
2459; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
2460;
2461  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
2462  ret <16 x i32> %1
2463}
2464
2465define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
2466; CHECK-LABEL: @avx512_psllv_d_512_var(
2467; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2468; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2469;
2470  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2471  ret <16 x i32> %1
2472}
2473
2474define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
2475; CHECK-LABEL: @avx512_psllv_d_512_big(
2476; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2477; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2478;
2479  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2480  ret <16 x i32> %1
2481}
2482
2483define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
2484; CHECK-LABEL: @avx512_psllv_d_512_allbig(
2485; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2486;
2487  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2488  ret <16 x i32> %1
2489}
2490
2491define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
2492; CHECK-LABEL: @avx512_psllv_d_512_undef(
2493; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2494; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
2495;
2496  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2497  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
2498  ret <16 x i32> %2
2499}
2500
2501define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
2502; CHECK-LABEL: @avx512_psllv_q_512_0(
2503; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
2504;
2505  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
2506  ret <8 x i64> %1
2507}
2508
2509define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
2510; CHECK-LABEL: @avx512_psllv_q_512_var(
2511; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2512; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2513;
2514  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
2515  ret <8 x i64> %1
2516}
2517
2518define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
2519; CHECK-LABEL: @avx512_psllv_q_512_big(
2520; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2521; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2522;
2523  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2524  ret <8 x i64> %1
2525}
2526
2527define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
2528; CHECK-LABEL: @avx512_psllv_q_512_allbig(
2529; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
2530;
2531  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
2532  ret <8 x i64> %1
2533}
2534
2535define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
2536; CHECK-LABEL: @avx512_psllv_q_512_undef(
2537; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2538; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
2539;
2540  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
2541  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
2542  ret <8 x i64> %2
2543}
2544
2545define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
2546; CHECK-LABEL: @avx512_psllv_w_128_0(
2547; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
2548;
2549  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
2550  ret <8 x i16> %1
2551}
2552
2553define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
2554; CHECK-LABEL: @avx512_psllv_w_128_var(
2555; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2556; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2557;
2558  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2559  ret <8 x i16> %1
2560}
2561
2562define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
2563; CHECK-LABEL: @avx512_psllv_w_128_big(
2564; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2565; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2566;
2567  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2568  ret <8 x i16> %1
2569}
2570
2571define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
2572; CHECK-LABEL: @avx512_psllv_w_128_allbig(
2573; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
2574;
2575  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
2576  ret <8 x i16> %1
2577}
2578
2579define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
2580; CHECK-LABEL: @avx512_psllv_w_128_undef(
2581; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2582; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
2583;
2584  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
2585  %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
2586  ret <8 x i16> %2
2587}
2588
2589define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
2590; CHECK-LABEL: @avx512_psllv_w_256_0(
2591; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
2592;
2593  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
2594  ret <16 x i16> %1
2595}
2596
2597define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
2598; CHECK-LABEL: @avx512_psllv_w_256_var(
2599; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2600; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2601;
2602  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2603  ret <16 x i16> %1
2604}
2605
2606define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
2607; CHECK-LABEL: @avx512_psllv_w_256_big(
2608; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2609; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2610;
2611  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2612  ret <16 x i16> %1
2613}
2614
2615define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
2616; CHECK-LABEL: @avx512_psllv_w_256_allbig(
2617; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
2618;
2619  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
2620  ret <16 x i16> %1
2621}
2622
2623define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
2624; CHECK-LABEL: @avx512_psllv_w_256_undef(
2625; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2626; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
2627;
2628  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
2629  %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
2630  ret <16 x i16> %2
2631}
2632
2633define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
2634; CHECK-LABEL: @avx512_psllv_w_512_0(
2635; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
2636;
2637  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
2638  ret <32 x i16> %1
2639}
2640
2641define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
2642; CHECK-LABEL: @avx512_psllv_w_512_var(
2643; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2644; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2645;
2646  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2647  ret <32 x i16> %1
2648}
2649
2650define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
2651; CHECK-LABEL: @avx512_psllv_w_512_big(
2652; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2653; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2654;
2655  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2656  ret <32 x i16> %1
2657}
2658
2659define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
2660; CHECK-LABEL: @avx512_psllv_w_512_allbig(
2661; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
2662;
2663  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
2664  ret <32 x i16> %1
2665}
2666
2667define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
2668; CHECK-LABEL: @avx512_psllv_w_512_undef(
2669; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2670; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
2671;
2672  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
2673  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
2674  ret <32 x i16> %2
2675}
2676
2677;
2678; Vector Masked Shift Amounts
2679;
2680
2681define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) {
2682; CHECK-LABEL: @sse2_psra_w_128_masked(
2683; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2684; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer
2685; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]]
2686; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
2687;
2688  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2689  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
2690  ret <8 x i16> %2
2691}
2692
2693define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) {
2694; CHECK-LABEL: @avx2_psra_d_256_masked(
2695; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2696; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> zeroinitializer
2697; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]]
2698; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
2699;
2700  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2701  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
2702  ret <8 x i32> %2
2703}
2704
2705define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) {
2706; CHECK-LABEL: @avx512_psra_q_512_masked(
2707; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2708; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> zeroinitializer
2709; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]]
2710; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
2711;
2712  %1 = and <2 x i64> %a, <i64 63, i64 undef>
2713  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
2714  ret <8 x i64> %2
2715}
2716
2717define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
2718; CHECK-LABEL: @sse2_psrl_d_128_masked(
2719; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2720; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
2721; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]]
2722; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2723;
2724  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2725  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
2726  ret <4 x i32> %2
2727}
2728
2729define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) {
2730; CHECK-LABEL: @avx2_psrl_q_256_masked(
2731; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2732; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> zeroinitializer
2733; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]]
2734; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
2735;
2736  %1 = and <2 x i64> %a, <i64 63, i64 undef>
2737  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
2738  ret <4 x i64> %2
2739}
2740
2741define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) {
2742; CHECK-LABEL: @avx512_psrl_w_512_masked(
2743; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2744; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <32 x i32> zeroinitializer
2745; CHECK-NEXT:    [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]]
2746; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
2747;
2748  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2749  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
2750  ret <32 x i16> %2
2751}
2752
2753define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
2754; CHECK-LABEL: @sse2_psll_q_128_masked(
2755; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2756; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
2757; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]]
2758; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
2759;
2760  %1 = and <2 x i64> %a, <i64 63, i64 undef>
2761  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
2762  ret <2 x i64> %2
2763}
2764
2765; The shift amount is in range (masked with 31 and high 32-bits are zero),
2766; so convert to standard IR - https://llvm.org/PR50123
2767
2768define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
2769; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast(
2770; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
2771; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
2772; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i64 1
2773; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
2774; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer
2775; CHECK-NEXT:    [[R:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]]
2776; CHECK-NEXT:    ret <2 x i64> [[R]]
2777;
2778  %b = bitcast <2 x i64> %a to <4 x i32>
2779  %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
2780  %i = insertelement <4 x i32> %m, i32 0, i32 1
2781  %shamt = bitcast <4 x i32> %i to <2 x i64>
2782  %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
2783  ret <2 x i64> %r
2784}
2785
2786; TODO: This could be recognized as an over-shift.
2787
2788define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) {
2789; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift(
2790; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
2791; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
2792; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 1, i64 1
2793; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
2794; CHECK-NEXT:    [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]])
2795; CHECK-NEXT:    ret <2 x i64> [[R]]
2796;
2797  %b = bitcast <2 x i64> %a to <4 x i32>
2798  %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
2799  %i = insertelement <4 x i32> %m, i32 1, i32 1
2800  %shamt = bitcast <4 x i32> %i to <2 x i64>
2801  %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
2802  ret <2 x i64> %r
2803}
2804
2805define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) {
2806; CHECK-LABEL: @avx2_psll_w_256_masked(
2807; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2808; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> zeroinitializer
2809; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]]
2810; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
2811;
2812  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2813  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
2814  ret <16 x i16> %2
2815}
2816
2817define <16 x i32> @avx512_psll_d_512_masked(<16 x i32> %v, <4 x i32> %a) {
2818; CHECK-LABEL: @avx512_psll_d_512_masked(
2819; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2820; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> zeroinitializer
2821; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]]
2822; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
2823;
2824  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2825  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
2826  ret <16 x i32> %2
2827}
2828
2829define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
2830; CHECK-LABEL: @sse2_psrai_w_128_masked(
2831; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2832; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
2833; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
2834; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
2835; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
2836; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
2837;
2838  %1 = and i32 %a, 15
2839  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 %1)
2840  ret <8 x i16> %2
2841}
2842
2843define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
2844; CHECK-LABEL: @avx2_psrai_d_256_masked(
2845; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2846; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
2847; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
2848; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
2849; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
2850;
2851  %1 = and i32 %a, 31
2852  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 %1)
2853  ret <8 x i32> %2
2854}
2855
2856define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
2857; CHECK-LABEL: @avx512_psrai_q_512_masked(
2858; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2859; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2860; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i64 0
2861; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
2862; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
2863; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
2864;
2865  %1 = and i32 %a, 63
2866  %2 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 %1)
2867  ret <8 x i64> %2
2868}
2869
2870define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
2871; CHECK-LABEL: @sse2_psrli_d_128_masked(
2872; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2873; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
2874; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2875; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
2876; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2877;
2878  %1 = and i32 %a, 31
2879  %2 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 %1)
2880  ret <4 x i32> %2
2881}
2882
2883define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
2884; CHECK-LABEL: @avx2_psrli_q_256_masked(
2885; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2886; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2887; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
2888; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
2889; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
2890; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
2891;
2892  %1 = and i32 %a, 63
2893  %2 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 %1)
2894  ret <4 x i64> %2
2895}
2896
2897define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
2898; CHECK-LABEL: @avx512_psrli_w_512_masked(
2899; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2900; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
2901; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i64 0
2902; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
2903; CHECK-NEXT:    [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
2904; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
2905;
2906  %1 = and i32 %a, 15
2907  %2 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 %1)
2908  ret <32 x i16> %2
2909}
2910
2911define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
2912; CHECK-LABEL: @sse2_pslli_q_128_masked(
2913; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2914; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2915; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
2916; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
2917; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
2918; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
2919;
2920  %1 = and i32 %a, 63
2921  %2 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 %1)
2922  ret <2 x i64> %2
2923}
2924
2925define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
2926; CHECK-LABEL: @avx2_pslli_w_256_masked(
2927; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2928; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
2929; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i64 0
2930; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
2931; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
2932; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
2933;
2934  %1 = and i32 %a, 15
2935  %2 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 %1)
2936  ret <16 x i16> %2
2937}
2938
2939define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
2940; CHECK-LABEL: @avx512_pslli_d_512_masked(
2941; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2942; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i64 0
2943; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
2944; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
2945; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
2946;
2947  %1 = and i32 %a, 31
2948  %2 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 %1)
2949  ret <16 x i32> %2
2950}
2951
2952define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
2953; CHECK-LABEL: @avx2_psrav_d_128_masked(
2954; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31)
2955; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]]
2956; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2957;
2958  %1 = and <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
2959  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
2960  ret <4 x i32> %2
2961}
2962
2963define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) {
2964; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle(
2965; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 15, i32 31>
2966; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
2967; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]]
2968; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2969;
2970  %1 = and <4 x i32> %a, <i32 undef, i32 undef, i32 15, i32 31>
2971  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
2972  %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2)
2973  ret <4 x i32> %3
2974}
2975
2976define <8 x i32> @avx2_psrav_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
2977; CHECK-LABEL: @avx2_psrav_d_256_masked(
2978; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
2979; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP1]]
2980; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
2981;
2982  %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
2983  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
2984  ret <8 x i32> %2
2985}
2986
2987define <32 x i16> @avx512_psrav_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
2988; CHECK-LABEL: @avx512_psrav_w_512_masked(
2989; CHECK-NEXT:    [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2990; CHECK-NEXT:    [[TMP2:%.*]] = ashr <32 x i16> [[V:%.*]], [[TMP1]]
2991; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
2992;
2993  %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2994  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
2995  ret <32 x i16> %2
2996}
2997
2998define <2 x i64> @avx2_psrlv_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
2999; CHECK-LABEL: @avx2_psrlv_q_128_masked(
3000; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 32, i64 63>
3001; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[V:%.*]], [[TMP1]]
3002; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
3003;
3004  %1 = and <2 x i64> %a, <i64 32, i64 63>
3005  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
3006  ret <2 x i64> %2
3007}
3008
3009define <8 x i32> @avx2_psrlv_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
3010; CHECK-LABEL: @avx2_psrlv_d_256_masked(
3011; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
3012; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i32> [[V:%.*]], [[TMP1]]
3013; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
3014;
3015  %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
3016  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
3017  ret <8 x i32> %2
3018}
3019
3020define <8 x i64> @avx512_psrlv_q_512_masked(<8 x i64> %v, <8 x i64> %a) {
3021; CHECK-LABEL: @avx512_psrlv_q_512_masked(
3022; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i64> [[A:%.*]], <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
3023; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i64> [[V:%.*]], [[TMP1]]
3024; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
3025;
3026  %1 = and <8 x i64> %a, <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
3027  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
3028  ret <8 x i64> %2
3029}
3030
3031define <4 x i32> @avx2_psllv_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
3032; CHECK-LABEL: @avx2_psllv_d_128_masked(
3033; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 16, i32 31>
3034; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[V:%.*]], [[TMP1]]
3035; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3036;
3037  %1 = and <4 x i32> %a, <i32 0, i32 15, i32 16, i32 31>
3038  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
3039  ret <4 x i32> %2
3040}
3041
3042define <4 x i64> @avx2_psllv_q_256_masked(<4 x i64> %v, <4 x i64> %a) {
3043; CHECK-LABEL: @avx2_psllv_q_256_masked(
3044; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i64> [[A:%.*]], <i64 0, i64 16, i64 32, i64 63>
3045; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i64> [[V:%.*]], [[TMP1]]
3046; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
3047;
3048  %1 = and <4 x i64> %a, <i64 0, i64 16, i64 32, i64 63>
3049  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
3050  ret <4 x i64> %2
3051}
3052
3053define <32 x i16> @avx512_psllv_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
3054; CHECK-LABEL: @avx512_psllv_w_512_masked(
3055; CHECK-NEXT:    [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
3056; CHECK-NEXT:    [[TMP2:%.*]] = shl <32 x i16> [[V:%.*]], [[TMP1]]
3057; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
3058;
3059  %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
3060  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
3061  ret <32 x i16> %2
3062}
3063
3064;
3065; Vector Demanded Bits
3066;
3067
3068define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
3069; CHECK-LABEL: @sse2_psra_w_var(
3070; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3071; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
3072;
3073  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3074  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
3075  ret <8 x i16> %2
3076}
3077
3078define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
3079; CHECK-LABEL: @sse2_psra_w_var_bc(
3080; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
3081; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3082; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
3083;
3084  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3085  %2 = bitcast <2 x i64> %1 to <8 x i16>
3086  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
3087  ret <8 x i16> %3
3088}
3089
3090define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
3091; CHECK-LABEL: @sse2_psra_d_var(
3092; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3093; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
3094;
3095  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3096  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
3097  ret <4 x i32> %2
3098}
3099
3100define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
3101; CHECK-LABEL: @sse2_psra_d_var_bc(
3102; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
3103; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3104; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3105;
3106  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3107  %2 = bitcast <8 x i16> %1 to <4 x i32>
3108  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
3109  ret <4 x i32> %3
3110}
3111
3112define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
3113; CHECK-LABEL: @avx2_psra_w_var(
3114; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3115; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
3116;
3117  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3118  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
3119  ret <16 x i16> %2
3120}
3121
3122define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
3123; CHECK-LABEL: @avx2_psra_d_var(
3124; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3125; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
3126;
3127  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3128  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
3129  ret <8 x i32> %2
3130}
3131
3132define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
3133; CHECK-LABEL: @avx512_psra_q_128_var(
3134; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3135; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
3136;
3137  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3138  %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
3139  ret <2 x i64> %2
3140}
3141
3142define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
3143; CHECK-LABEL: @avx512_psra_q_256_var(
3144; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3145; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
3146;
3147  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3148  %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
3149  ret <4 x i64> %2
3150}
3151
3152define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3153; CHECK-LABEL: @avx512_psra_w_512_var(
3154; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3155; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
3156;
3157  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3158  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
3159  ret <32 x i16> %2
3160}
3161
3162define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3163; CHECK-LABEL: @avx512_psra_d_512_var(
3164; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3165; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
3166;
3167  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3168  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
3169  ret <16 x i32> %2
3170}
3171
3172define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3173; CHECK-LABEL: @avx512_psra_q_512_var(
3174; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3175; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
3176;
3177  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3178  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
3179  ret <8 x i64> %2
3180}
3181
3182define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
3183; CHECK-LABEL: @sse2_psrl_w_var(
3184; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3185; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
3186;
3187  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3188  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
3189  ret <8 x i16> %2
3190}
3191
3192define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
3193; CHECK-LABEL: @sse2_psrl_d_var(
3194; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3195; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
3196;
3197  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3198  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
3199  ret <4 x i32> %2
3200}
3201
3202define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
3203; CHECK-LABEL: @sse2_psrl_q_var(
3204; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3205; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
3206;
3207  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3208  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
3209  ret <2 x i64> %2
3210}
3211
3212define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
3213; CHECK-LABEL: @avx2_psrl_w_var(
3214; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3215; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
3216;
3217  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3218  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
3219  ret <16 x i16> %2
3220}
3221
3222define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
3223; CHECK-LABEL: @avx2_psrl_w_var_bc(
3224; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
3225; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3226; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
3227;
3228  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3229  %2 = bitcast <16 x i8> %1 to <8 x i16>
3230  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
3231  ret <16 x i16> %3
3232}
3233
3234define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
3235; CHECK-LABEL: @avx2_psrl_d_var(
3236; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3237; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
3238;
3239  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3240  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
3241  ret <8 x i32> %2
3242}
3243
3244define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
3245; CHECK-LABEL: @avx2_psrl_d_var_bc(
3246; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
3247; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3248; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
3249;
3250  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3251  %2 = bitcast <2 x i64> %1 to <4 x i32>
3252  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
3253  ret <8 x i32> %3
3254}
3255
3256define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
3257; CHECK-LABEL: @avx2_psrl_q_var(
3258; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3259; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
3260;
3261  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3262  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
3263  ret <4 x i64> %2
3264}
3265
3266define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3267; CHECK-LABEL: @avx512_psrl_w_512_var(
3268; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3269; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
3270;
3271  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3272  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
3273  ret <32 x i16> %2
3274}
3275
3276define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
3277; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
3278; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
3279; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3280; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
3281;
3282  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3283  %2 = bitcast <16 x i8> %1 to <8 x i16>
3284  %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
3285  ret <32 x i16> %3
3286}
3287
3288define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3289; CHECK-LABEL: @avx512_psrl_d_512_var(
3290; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3291; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
3292;
3293  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3294  %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
3295  ret <16 x i32> %2
3296}
3297
3298define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
3299; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
3300; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
3301; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3302; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
3303;
3304  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3305  %2 = bitcast <2 x i64> %1 to <4 x i32>
3306  %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
3307  ret <16 x i32> %3
3308}
3309
3310define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3311; CHECK-LABEL: @avx512_psrl_q_512_var(
3312; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3313; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
3314;
3315  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3316  %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
3317  ret <8 x i64> %2
3318}
3319
3320define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
3321; CHECK-LABEL: @sse2_psll_w_var(
3322; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3323; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
3324;
3325  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3326  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
3327  ret <8 x i16> %2
3328}
3329
3330define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
3331; CHECK-LABEL: @sse2_psll_d_var(
3332; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3333; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
3334;
3335  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3336  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
3337  ret <4 x i32> %2
3338}
3339
3340define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
3341; CHECK-LABEL: @sse2_psll_q_var(
3342; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3343; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
3344;
3345  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3346  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
3347  ret <2 x i64> %2
3348}
3349
3350define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
3351; CHECK-LABEL: @avx2_psll_w_var(
3352; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3353; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
3354;
3355  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3356  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
3357  ret <16 x i16> %2
3358}
3359
3360define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
3361; CHECK-LABEL: @avx2_psll_d_var(
3362; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3363; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
3364;
3365  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3366  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
3367  ret <8 x i32> %2
3368}
3369
3370define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
3371; CHECK-LABEL: @avx2_psll_q_var(
3372; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3373; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
3374;
3375  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3376  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
3377  ret <4 x i64> %2
3378}
3379
3380define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3381; CHECK-LABEL: @avx512_psll_w_512_var(
3382; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3383; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
3384;
3385  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3386  %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
3387  ret <32 x i16> %2
3388}
3389
3390define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3391; CHECK-LABEL: @avx512_psll_d_512_var(
3392; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3393; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
3394;
3395  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3396  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
3397  ret <16 x i32> %2
3398}
3399
3400define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3401; CHECK-LABEL: @avx512_psll_q_512_var(
3402; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3403; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
3404;
3405  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3406  %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
3407  ret <8 x i64> %2
3408}
3409
3410;
3411; Constant Folding
3412;
3413
3414define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
3415; CHECK-LABEL: @test_sse2_psra_w_0(
3416; CHECK-NEXT:    ret <8 x i16> [[A:%.*]]
3417;
3418  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
3419  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3420  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
3421  ret <8 x i16> %3
3422}
3423
3424define <8 x i16> @test_sse2_psra_w_8() {
3425; CHECK-LABEL: @test_sse2_psra_w_8(
3426; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3427;
3428  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
3429  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
3430  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3431  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
3432  ret <8 x i16> %4
3433}
3434
3435define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
3436; CHECK-LABEL: @test_sse2_psra_d_0(
3437; CHECK-NEXT:    ret <4 x i32> [[A:%.*]]
3438;
3439  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
3440  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3441  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
3442  ret <4 x i32> %3
3443}
3444
3445define <4 x i32> @sse2_psra_d_8() {
3446; CHECK-LABEL: @sse2_psra_d_8(
3447; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3448;
3449  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
3450  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
3451  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3452  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
3453  ret <4 x i32> %4
3454}
3455
3456define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
3457; CHECK-LABEL: @test_avx2_psra_w_0(
3458; CHECK-NEXT:    ret <16 x i16> [[A:%.*]]
3459;
3460  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
3461  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3462  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
3463  ret <16 x i16> %3
3464}
3465
3466define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
3467; CHECK-LABEL: @test_avx2_psra_w_8(
3468; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3469;
3470  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
3471  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
3472  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3473  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
3474  ret <16 x i16> %4
3475}
3476
3477define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
3478; CHECK-LABEL: @test_avx2_psra_d_0(
3479; CHECK-NEXT:    ret <8 x i32> [[A:%.*]]
3480;
3481  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
3482  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3483  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
3484  ret <8 x i32> %3
3485}
3486
3487define <8 x i32> @test_avx2_psra_d_8() {
3488; CHECK-LABEL: @test_avx2_psra_d_8(
3489; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3490;
3491  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
3492  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
3493  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3494  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
3495  ret <8 x i32> %4
3496}
3497
3498define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
3499; CHECK-LABEL: @test_avx512_psra_w_512_0(
3500; CHECK-NEXT:    ret <32 x i16> [[A:%.*]]
3501;
3502  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
3503  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3504  %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
3505  ret <32 x i16> %3
3506}
3507
3508define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
3509; CHECK-LABEL: @test_avx512_psra_w_512_8(
3510; CHECK-NEXT:    ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3511;
3512  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
3513  %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
3514  %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3515  %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
3516  ret <32 x i16> %4
3517}
3518
3519define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
3520; CHECK-LABEL: @test_avx512_psra_d_512_0(
3521; CHECK-NEXT:    ret <16 x i32> [[A:%.*]]
3522;
3523  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
3524  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3525  %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
3526  ret <16 x i32> %3
3527}
3528
3529define <16 x i32> @test_avx512_psra_d_512_8() {
3530; CHECK-LABEL: @test_avx512_psra_d_512_8(
3531; CHECK-NEXT:    ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3532;
3533  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
3534  %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
3535  %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3536  %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
3537  ret <16 x i32> %4
3538}
3539
3540;
3541; Old Tests
3542;
3543
3544define <2 x i64> @test_sse2_1() {
3545; CHECK-LABEL: @test_sse2_1(
3546; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
3547;
3548  %S = bitcast i32 1 to i32
3549  %1 = zext i32 %S to i64
3550  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3551  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3552  %4 = bitcast <2 x i64> %3 to <8 x i16>
3553  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
3554  %6 = bitcast <8 x i16> %5 to <4 x i32>
3555  %7 = bitcast <2 x i64> %3 to <4 x i32>
3556  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
3557  %9 = bitcast <4 x i32> %8 to <2 x i64>
3558  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
3559  %11 = bitcast <2 x i64> %10 to <8 x i16>
3560  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
3561  %13 = bitcast <8 x i16> %12 to <4 x i32>
3562  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
3563  %15 = bitcast <4 x i32> %14 to <2 x i64>
3564  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
3565  ret <2 x i64> %16
3566}
3567
3568define <4 x i64> @test_avx2_1() {
3569; CHECK-LABEL: @test_avx2_1(
3570; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
3571;
3572  %S = bitcast i32 1 to i32
3573  %1 = zext i32 %S to i64
3574  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3575  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3576  %4 = bitcast <2 x i64> %3 to <8 x i16>
3577  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
3578  %6 = bitcast <16 x i16> %5 to <8 x i32>
3579  %7 = bitcast <2 x i64> %3 to <4 x i32>
3580  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
3581  %9 = bitcast <8 x i32> %8 to <4 x i64>
3582  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
3583  %11 = bitcast <4 x i64> %10 to <16 x i16>
3584  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
3585  %13 = bitcast <16 x i16> %12 to <8 x i32>
3586  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
3587  %15 = bitcast <8 x i32> %14 to <4 x i64>
3588  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
3589  ret <4 x i64> %16
3590}
3591
3592define <2 x i64> @test_sse2_0() {
3593; CHECK-LABEL: @test_sse2_0(
3594; CHECK-NEXT:    ret <2 x i64> zeroinitializer
3595;
3596  %S = bitcast i32 128 to i32
3597  %1 = zext i32 %S to i64
3598  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3599  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3600  %4 = bitcast <2 x i64> %3 to <8 x i16>
3601  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
3602  %6 = bitcast <8 x i16> %5 to <4 x i32>
3603  %7 = bitcast <2 x i64> %3 to <4 x i32>
3604  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
3605  %9 = bitcast <4 x i32> %8 to <2 x i64>
3606  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
3607  %11 = bitcast <2 x i64> %10 to <8 x i16>
3608  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
3609  %13 = bitcast <8 x i16> %12 to <4 x i32>
3610  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
3611  %15 = bitcast <4 x i32> %14 to <2 x i64>
3612  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
3613  ret <2 x i64> %16
3614}
3615
3616define <4 x i64> @test_avx2_0() {
3617; CHECK-LABEL: @test_avx2_0(
3618; CHECK-NEXT:    ret <4 x i64> zeroinitializer
3619;
3620  %S = bitcast i32 128 to i32
3621  %1 = zext i32 %S to i64
3622  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3623  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3624  %4 = bitcast <2 x i64> %3 to <8 x i16>
3625  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
3626  %6 = bitcast <16 x i16> %5 to <8 x i32>
3627  %7 = bitcast <2 x i64> %3 to <4 x i32>
3628  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
3629  %9 = bitcast <8 x i32> %8 to <4 x i64>
3630  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
3631  %11 = bitcast <4 x i64> %10 to <16 x i16>
3632  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
3633  %13 = bitcast <16 x i16> %12 to <8 x i32>
3634  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
3635  %15 = bitcast <8 x i32> %14 to <4 x i64>
3636  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
3637  ret <4 x i64> %16
3638}
3639define <2 x i64> @test_sse2_psrl_1() {
3640; CHECK-LABEL: @test_sse2_psrl_1(
3641; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
3642;
3643  %S = bitcast i32 1 to i32
3644  %1 = zext i32 %S to i64
3645  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3646  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3647  %4 = bitcast <2 x i64> %3 to <8 x i16>
3648  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
3649  %6 = bitcast <8 x i16> %5 to <4 x i32>
3650  %7 = bitcast <2 x i64> %3 to <4 x i32>
3651  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
3652  %9 = bitcast <4 x i32> %8 to <2 x i64>
3653  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
3654  %11 = bitcast <2 x i64> %10 to <8 x i16>
3655  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
3656  %13 = bitcast <8 x i16> %12 to <4 x i32>
3657  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
3658  %15 = bitcast <4 x i32> %14 to <2 x i64>
3659  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
3660  ret <2 x i64> %16
3661}
3662
3663define <4 x i64> @test_avx2_psrl_1() {
3664; CHECK-LABEL: @test_avx2_psrl_1(
3665; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
3666;
3667  %S = bitcast i32 1 to i32
3668  %1 = zext i32 %S to i64
3669  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3670  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3671  %4 = bitcast <2 x i64> %3 to <8 x i16>
3672  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
3673  %6 = bitcast <16 x i16> %5 to <8 x i32>
3674  %7 = bitcast <2 x i64> %3 to <4 x i32>
3675  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
3676  %9 = bitcast <8 x i32> %8 to <4 x i64>
3677  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
3678  %11 = bitcast <4 x i64> %10 to <16 x i16>
3679  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
3680  %13 = bitcast <16 x i16> %12 to <8 x i32>
3681  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
3682  %15 = bitcast <8 x i32> %14 to <4 x i64>
3683  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
3684  ret <4 x i64> %16
3685}
3686
3687define <2 x i64> @test_sse2_psrl_0() {
3688; CHECK-LABEL: @test_sse2_psrl_0(
3689; CHECK-NEXT:    ret <2 x i64> zeroinitializer
3690;
3691  %S = bitcast i32 128 to i32
3692  %1 = zext i32 %S to i64
3693  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3694  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3695  %4 = bitcast <2 x i64> %3 to <8 x i16>
3696  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
3697  %6 = bitcast <8 x i16> %5 to <4 x i32>
3698  %7 = bitcast <2 x i64> %3 to <4 x i32>
3699  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
3700  %9 = bitcast <4 x i32> %8 to <2 x i64>
3701  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
3702  %11 = bitcast <2 x i64> %10 to <8 x i16>
3703  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
3704  %13 = bitcast <8 x i16> %12 to <4 x i32>
3705  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
3706  %15 = bitcast <4 x i32> %14 to <2 x i64>
3707  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
3708  ret <2 x i64> %16
3709}
3710
3711define <4 x i64> @test_avx2_psrl_0() {
3712; CHECK-LABEL: @test_avx2_psrl_0(
3713; CHECK-NEXT:    ret <4 x i64> zeroinitializer
3714;
3715  %S = bitcast i32 128 to i32
3716  %1 = zext i32 %S to i64
3717  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3718  %3 = insertelement <2 x i64> %2, i64 0, i32 1
3719  %4 = bitcast <2 x i64> %3 to <8 x i16>
3720  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
3721  %6 = bitcast <16 x i16> %5 to <8 x i32>
3722  %7 = bitcast <2 x i64> %3 to <4 x i32>
3723  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
3724  %9 = bitcast <8 x i32> %8 to <4 x i64>
3725  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
3726  %11 = bitcast <4 x i64> %10 to <16 x i16>
3727  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
3728  %13 = bitcast <16 x i16> %12 to <8 x i32>
3729  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
3730  %15 = bitcast <8 x i32> %14 to <4 x i64>
3731  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
3732  ret <4 x i64> %16
3733}
3734
3735declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
3736declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
3737declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
3738declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
3739declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
3740declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
3741declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
3742declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
3743declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
3744declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
3745declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
3746declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
3747declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
3748declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
3749declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
3750declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
3751declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
3752declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
3753
3754declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
3755declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
3756declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
3757declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
3758declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
3759declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
3760declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
3761declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
3762declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
3763declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
3764declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
3765declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
3766declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
3767declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
3768declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
3769declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
3770declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
3771declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
3772
3773declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
3774declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
3775declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
3776declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
3777declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
3778declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
3779declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
3780declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
3781declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
3782declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
3783declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
3784declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
3785declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
3786declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
3787declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
3788declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
3789declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
3790declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
3791
3792declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
3793declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
3794declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
3795declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
3796declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
3797declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
3798
3799declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
3800declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
3801declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
3802declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
3803declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
3804declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
3805
3806declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
3807declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
3808declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
3809declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
3810declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
3811declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
3812
3813declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
3814declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
3815declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
3816declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
3817declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
3818declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
3819declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
3820declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
3821declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1
3822
3823attributes #1 = { nounwind readnone }
3824