xref: /llvm-project/llvm/test/CodeGen/X86/avx2-shift.ll (revision f6ff2cc7e0ae4fd9b14583a998ddeada256a954f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
4
5define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
6; CHECK-LABEL: variable_shl0:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
9; CHECK-NEXT:    ret{{[l|q]}}
10  %k = shl <4 x i32> %x, %y
11  ret <4 x i32> %k
12}
13
14define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
15; CHECK-LABEL: variable_shl1:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
18; CHECK-NEXT:    ret{{[l|q]}}
19  %k = shl <8 x i32> %x, %y
20  ret <8 x i32> %k
21}
22
23define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
24; CHECK-LABEL: variable_shl2:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
27; CHECK-NEXT:    ret{{[l|q]}}
28  %k = shl <2 x i64> %x, %y
29  ret <2 x i64> %k
30}
31
32define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
33; CHECK-LABEL: variable_shl3:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
36; CHECK-NEXT:    ret{{[l|q]}}
37  %k = shl <4 x i64> %x, %y
38  ret <4 x i64> %k
39}
40
41define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
42; CHECK-LABEL: variable_srl0:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
45; CHECK-NEXT:    ret{{[l|q]}}
46  %k = lshr <4 x i32> %x, %y
47  ret <4 x i32> %k
48}
49
50define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
51; CHECK-LABEL: variable_srl1:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
54; CHECK-NEXT:    ret{{[l|q]}}
55  %k = lshr <8 x i32> %x, %y
56  ret <8 x i32> %k
57}
58
59define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
60; CHECK-LABEL: variable_srl2:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
63; CHECK-NEXT:    ret{{[l|q]}}
64  %k = lshr <2 x i64> %x, %y
65  ret <2 x i64> %k
66}
67
68define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
69; CHECK-LABEL: variable_srl3:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
72; CHECK-NEXT:    ret{{[l|q]}}
73  %k = lshr <4 x i64> %x, %y
74  ret <4 x i64> %k
75}
76
77define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
78; CHECK-LABEL: variable_sra0:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
81; CHECK-NEXT:    ret{{[l|q]}}
82  %k = ashr <4 x i32> %x, %y
83  ret <4 x i32> %k
84}
85
86define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
87; CHECK-LABEL: variable_sra1:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
90; CHECK-NEXT:    ret{{[l|q]}}
91  %k = ashr <8 x i32> %x, %y
92  ret <8 x i32> %k
93}
94
95;;; Shift left
96
97define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
98; CHECK-LABEL: vshift00:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vpslld $2, %ymm0, %ymm0
101; CHECK-NEXT:    ret{{[l|q]}}
102  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
103  ret <8 x i32> %s
104}
105
106define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
107; CHECK-LABEL: vshift01:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    vpsllw $2, %ymm0, %ymm0
110; CHECK-NEXT:    ret{{[l|q]}}
111  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
112  ret <16 x i16> %s
113}
114
115define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
116; CHECK-LABEL: vshift02:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vpsllq $2, %ymm0, %ymm0
119; CHECK-NEXT:    ret{{[l|q]}}
120  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
121  ret <4 x i64> %s
122}
123
124;;; Logical Shift right
125
126define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
127; CHECK-LABEL: vshift03:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    vpsrld $2, %ymm0, %ymm0
130; CHECK-NEXT:    ret{{[l|q]}}
131  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
132  ret <8 x i32> %s
133}
134
135define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
136; CHECK-LABEL: vshift04:
137; CHECK:       # %bb.0:
138; CHECK-NEXT:    vpsrlw $2, %ymm0, %ymm0
139; CHECK-NEXT:    ret{{[l|q]}}
140  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
141  ret <16 x i16> %s
142}
143
144define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
145; CHECK-LABEL: vshift05:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    vpsrlq $2, %ymm0, %ymm0
148; CHECK-NEXT:    ret{{[l|q]}}
149  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
150  ret <4 x i64> %s
151}
152
153;;; Arithmetic Shift right
154
155define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
156; CHECK-LABEL: vshift06:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    vpsrad $2, %ymm0, %ymm0
159; CHECK-NEXT:    ret{{[l|q]}}
160  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
161  ret <8 x i32> %s
162}
163
164define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
165; CHECK-LABEL: vshift07:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    vpsraw $2, %ymm0, %ymm0
168; CHECK-NEXT:    ret{{[l|q]}}
169  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
170  ret <16 x i16> %s
171}
172
173define <4 x i32> @variable_sra0_load(<4 x i32> %x, ptr %y) {
174; X86-LABEL: variable_sra0_load:
175; X86:       # %bb.0:
176; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
177; X86-NEXT:    vpsravd (%eax), %xmm0, %xmm0
178; X86-NEXT:    retl
179;
180; X64-LABEL: variable_sra0_load:
181; X64:       # %bb.0:
182; X64-NEXT:    vpsravd (%rdi), %xmm0, %xmm0
183; X64-NEXT:    retq
184  %y1 = load <4 x i32>, ptr %y
185  %k = ashr <4 x i32> %x, %y1
186  ret <4 x i32> %k
187}
188
189define <8 x i32> @variable_sra1_load(<8 x i32> %x, ptr %y) {
190; X86-LABEL: variable_sra1_load:
191; X86:       # %bb.0:
192; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X86-NEXT:    vpsravd (%eax), %ymm0, %ymm0
194; X86-NEXT:    retl
195;
196; X64-LABEL: variable_sra1_load:
197; X64:       # %bb.0:
198; X64-NEXT:    vpsravd (%rdi), %ymm0, %ymm0
199; X64-NEXT:    retq
200  %y1 = load <8 x i32>, ptr %y
201  %k = ashr <8 x i32> %x, %y1
202  ret <8 x i32> %k
203}
204
205define <4 x i32> @variable_shl0_load(<4 x i32> %x, ptr %y) {
206; X86-LABEL: variable_shl0_load:
207; X86:       # %bb.0:
208; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
209; X86-NEXT:    vpsllvd (%eax), %xmm0, %xmm0
210; X86-NEXT:    retl
211;
212; X64-LABEL: variable_shl0_load:
213; X64:       # %bb.0:
214; X64-NEXT:    vpsllvd (%rdi), %xmm0, %xmm0
215; X64-NEXT:    retq
216  %y1 = load <4 x i32>, ptr %y
217  %k = shl <4 x i32> %x, %y1
218  ret <4 x i32> %k
219}
220
221define <8 x i32> @variable_shl1_load(<8 x i32> %x, ptr %y) {
222; X86-LABEL: variable_shl1_load:
223; X86:       # %bb.0:
224; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
225; X86-NEXT:    vpsllvd (%eax), %ymm0, %ymm0
226; X86-NEXT:    retl
227;
228; X64-LABEL: variable_shl1_load:
229; X64:       # %bb.0:
230; X64-NEXT:    vpsllvd (%rdi), %ymm0, %ymm0
231; X64-NEXT:    retq
232  %y1 = load <8 x i32>, ptr %y
233  %k = shl <8 x i32> %x, %y1
234  ret <8 x i32> %k
235}
236
237define <2 x i64> @variable_shl2_load(<2 x i64> %x, ptr %y) {
238; X86-LABEL: variable_shl2_load:
239; X86:       # %bb.0:
240; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
241; X86-NEXT:    vpsllvq (%eax), %xmm0, %xmm0
242; X86-NEXT:    retl
243;
244; X64-LABEL: variable_shl2_load:
245; X64:       # %bb.0:
246; X64-NEXT:    vpsllvq (%rdi), %xmm0, %xmm0
247; X64-NEXT:    retq
248  %y1 = load <2 x i64>, ptr %y
249  %k = shl <2 x i64> %x, %y1
250  ret <2 x i64> %k
251}
252
253define <4 x i64> @variable_shl3_load(<4 x i64> %x, ptr %y) {
254; X86-LABEL: variable_shl3_load:
255; X86:       # %bb.0:
256; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
257; X86-NEXT:    vpsllvq (%eax), %ymm0, %ymm0
258; X86-NEXT:    retl
259;
260; X64-LABEL: variable_shl3_load:
261; X64:       # %bb.0:
262; X64-NEXT:    vpsllvq (%rdi), %ymm0, %ymm0
263; X64-NEXT:    retq
264  %y1 = load <4 x i64>, ptr %y
265  %k = shl <4 x i64> %x, %y1
266  ret <4 x i64> %k
267}
268
269define <4 x i32> @variable_srl0_load(<4 x i32> %x, ptr %y) {
270; X86-LABEL: variable_srl0_load:
271; X86:       # %bb.0:
272; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
273; X86-NEXT:    vpsrlvd (%eax), %xmm0, %xmm0
274; X86-NEXT:    retl
275;
276; X64-LABEL: variable_srl0_load:
277; X64:       # %bb.0:
278; X64-NEXT:    vpsrlvd (%rdi), %xmm0, %xmm0
279; X64-NEXT:    retq
280  %y1 = load <4 x i32>, ptr %y
281  %k = lshr <4 x i32> %x, %y1
282  ret <4 x i32> %k
283}
284
285define <8 x i32> @variable_srl1_load(<8 x i32> %x, ptr %y) {
286; X86-LABEL: variable_srl1_load:
287; X86:       # %bb.0:
288; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
289; X86-NEXT:    vpsrlvd (%eax), %ymm0, %ymm0
290; X86-NEXT:    retl
291;
292; X64-LABEL: variable_srl1_load:
293; X64:       # %bb.0:
294; X64-NEXT:    vpsrlvd (%rdi), %ymm0, %ymm0
295; X64-NEXT:    retq
296  %y1 = load <8 x i32>, ptr %y
297  %k = lshr <8 x i32> %x, %y1
298  ret <8 x i32> %k
299}
300
301define <2 x i64> @variable_srl2_load(<2 x i64> %x, ptr %y) {
302; X86-LABEL: variable_srl2_load:
303; X86:       # %bb.0:
304; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
305; X86-NEXT:    vpsrlvq (%eax), %xmm0, %xmm0
306; X86-NEXT:    retl
307;
308; X64-LABEL: variable_srl2_load:
309; X64:       # %bb.0:
310; X64-NEXT:    vpsrlvq (%rdi), %xmm0, %xmm0
311; X64-NEXT:    retq
312  %y1 = load <2 x i64>, ptr %y
313  %k = lshr <2 x i64> %x, %y1
314  ret <2 x i64> %k
315}
316
317define <4 x i64> @variable_srl3_load(<4 x i64> %x, ptr %y) {
318; X86-LABEL: variable_srl3_load:
319; X86:       # %bb.0:
320; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
321; X86-NEXT:    vpsrlvq (%eax), %ymm0, %ymm0
322; X86-NEXT:    retl
323;
324; X64-LABEL: variable_srl3_load:
325; X64:       # %bb.0:
326; X64-NEXT:    vpsrlvq (%rdi), %ymm0, %ymm0
327; X64-NEXT:    retq
328  %y1 = load <4 x i64>, ptr %y
329  %k = lshr <4 x i64> %x, %y1
330  ret <4 x i64> %k
331}
332
333define <32 x i8> @shl9(<32 x i8> %A) nounwind {
334; X86-LABEL: shl9:
335; X86:       # %bb.0:
336; X86-NEXT:    vpsllw $3, %ymm0, %ymm0
337; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
338; X86-NEXT:    retl
339;
340; X64-LABEL: shl9:
341; X64:       # %bb.0:
342; X64-NEXT:    vpsllw $3, %ymm0, %ymm0
343; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
344; X64-NEXT:    retq
345  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
346  ret <32 x i8> %B
347}
348
349define <32 x i8> @shr9(<32 x i8> %A) nounwind {
350; X86-LABEL: shr9:
351; X86:       # %bb.0:
352; X86-NEXT:    vpsrlw $3, %ymm0, %ymm0
353; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
354; X86-NEXT:    retl
355;
356; X64-LABEL: shr9:
357; X64:       # %bb.0:
358; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
359; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
360; X64-NEXT:    retq
361  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
362  ret <32 x i8> %B
363}
364
365define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
366; CHECK-LABEL: sra_v32i8_7:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
369; CHECK-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
370; CHECK-NEXT:    ret{{[l|q]}}
371  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
372  ret <32 x i8> %B
373}
374
375define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
376; X86-LABEL: sra_v32i8:
377; X86:       # %bb.0:
378; X86-NEXT:    vpsrlw $3, %ymm0, %ymm0
379; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
380; X86-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
381; X86-NEXT:    vpxor %ymm1, %ymm0, %ymm0
382; X86-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
383; X86-NEXT:    retl
384;
385; X64-LABEL: sra_v32i8:
386; X64:       # %bb.0:
387; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
388; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
389; X64-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
390; X64-NEXT:    vpxor %ymm1, %ymm0, %ymm0
391; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
392; X64-NEXT:    retq
393  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
394  ret <32 x i8> %B
395}
396
397define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
398; CHECK-LABEL: sext_v16i16:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    vpsllw $8, %ymm0, %ymm0
401; CHECK-NEXT:    vpsraw $8, %ymm0, %ymm0
402; CHECK-NEXT:    ret{{[l|q]}}
403  %b = trunc <16 x i16> %a to <16 x i8>
404  %c = sext <16 x i8> %b to <16 x i16>
405  ret <16 x i16> %c
406}
407
408define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
409; CHECK-LABEL: sext_v8i32:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vpslld $16, %ymm0, %ymm0
412; CHECK-NEXT:    vpsrad $16, %ymm0, %ymm0
413; CHECK-NEXT:    ret{{[l|q]}}
414  %b = trunc <8 x i32> %a to <8 x i16>
415  %c = sext <8 x i16> %b to <8 x i32>
416  ret <8 x i32> %c
417}
418
419define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
420; CHECK-LABEL: variable_shl16:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
423; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
424; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
425; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
426; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
427; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
428; CHECK-NEXT:    vzeroupper
429; CHECK-NEXT:    ret{{[l|q]}}
430  %res = shl <8 x i16> %lhs, %rhs
431  ret <8 x i16> %res
432}
433
434define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
435; CHECK-LABEL: variable_ashr16:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
438; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
439; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
440; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
441; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
442; CHECK-NEXT:    vzeroupper
443; CHECK-NEXT:    ret{{[l|q]}}
444  %res = ashr <8 x i16> %lhs, %rhs
445  ret <8 x i16> %res
446}
447
448define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
449; CHECK-LABEL: variable_lshr16:
450; CHECK:       # %bb.0:
451; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
452; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
453; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
454; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
455; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
456; CHECK-NEXT:    vzeroupper
457; CHECK-NEXT:    ret{{[l|q]}}
458  %res = lshr <8 x i16> %lhs, %rhs
459  ret <8 x i16> %res
460}
461