xref: /llvm-project/llvm/test/CodeGen/X86/shuffle-of-shift.ll (revision c3bf6d20ac306b829dc99939b3a8f9487f7f1c9a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,X64,X64-SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2,X64,X64-AVX2
4; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,X86,X86-SSE2
5; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2,X86,X86-AVX2
6
7;------------------------------ 32-bit shuffles -------------------------------;
8
9define <4 x i32> @shuffle_i32_of_shl_i16(<8 x i16> %x) nounwind {
10; SSE2-LABEL: shuffle_i32_of_shl_i16:
11; SSE2:       # %bb.0:
12; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
13; SSE2-NEXT:    psllw $15, %xmm0
14; SSE2-NEXT:    ret{{[l|q]}}
15;
16; AVX2-LABEL: shuffle_i32_of_shl_i16:
17; AVX2:       # %bb.0:
18; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
19; AVX2-NEXT:    vpsllw $15, %xmm0, %xmm0
20; AVX2-NEXT:    ret{{[l|q]}}
21  %i1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 15)
22  %i2 = bitcast <8 x i16> %i1 to <4 x i32>
23  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
24  ret <4 x i32> %i3
25}
26define <4 x i32> @shuffle_i32_of_lshr_i16(<8 x i16> %x) nounwind {
27; SSE2-LABEL: shuffle_i32_of_lshr_i16:
28; SSE2:       # %bb.0:
29; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30; SSE2-NEXT:    psrlw $15, %xmm0
31; SSE2-NEXT:    ret{{[l|q]}}
32;
33; AVX2-LABEL: shuffle_i32_of_lshr_i16:
34; AVX2:       # %bb.0:
35; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
36; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
37; AVX2-NEXT:    ret{{[l|q]}}
38  %i1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %x, i32 15)
39  %i2 = bitcast <8 x i16> %i1 to <4 x i32>
40  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
41  ret <4 x i32> %i3
42}
43define <4 x i32> @shuffle_i32_of_ashr_i16(<8 x i16> %x) nounwind {
44; SSE2-LABEL: shuffle_i32_of_ashr_i16:
45; SSE2:       # %bb.0:
46; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
47; SSE2-NEXT:    psraw $15, %xmm0
48; SSE2-NEXT:    ret{{[l|q]}}
49;
50; AVX2-LABEL: shuffle_i32_of_ashr_i16:
51; AVX2:       # %bb.0:
52; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
53; AVX2-NEXT:    vpsraw $15, %xmm0, %xmm0
54; AVX2-NEXT:    ret{{[l|q]}}
55  %i1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %x, i32 15)
56  %i2 = bitcast <8 x i16> %i1 to <4 x i32>
57  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58  ret <4 x i32> %i3
59}
60
61define <4 x i32> @shuffle_i32_of_shl_i32(<4 x i32> %x) nounwind {
62; SSE2-LABEL: shuffle_i32_of_shl_i32:
63; SSE2:       # %bb.0:
64; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
65; SSE2-NEXT:    pslld $31, %xmm0
66; SSE2-NEXT:    ret{{[l|q]}}
67;
68; AVX2-LABEL: shuffle_i32_of_shl_i32:
69; AVX2:       # %bb.0:
70; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
71; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
72; AVX2-NEXT:    ret{{[l|q]}}
73  %i1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %x, i32 31)
74  %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
75  ret <4 x i32> %i2
76}
77define <4 x i32> @shuffle_i32_of_lshr_i32(<4 x i32> %x) nounwind {
78; SSE2-LABEL: shuffle_i32_of_lshr_i32:
79; SSE2:       # %bb.0:
80; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
81; SSE2-NEXT:    psrld $31, %xmm0
82; SSE2-NEXT:    ret{{[l|q]}}
83;
84; AVX2-LABEL: shuffle_i32_of_lshr_i32:
85; AVX2:       # %bb.0:
86; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
87; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
88; AVX2-NEXT:    ret{{[l|q]}}
89  %i1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %x, i32 31)
90  %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
91  ret <4 x i32> %i2
92}
93define <4 x i32> @shuffle_i32_of_ashr_i32(<4 x i32> %x) nounwind {
94; SSE2-LABEL: shuffle_i32_of_ashr_i32:
95; SSE2:       # %bb.0:
96; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
97; SSE2-NEXT:    psrad $31, %xmm0
98; SSE2-NEXT:    ret{{[l|q]}}
99;
100; AVX2-LABEL: shuffle_i32_of_ashr_i32:
101; AVX2:       # %bb.0:
102; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
103; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
104; AVX2-NEXT:    ret{{[l|q]}}
105  %i1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %x, i32 31)
106  %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
107  ret <4 x i32> %i2
108}
109
110define <4 x i32> @shuffle_i32_of_shl_i64(<2 x i64> %x) nounwind {
111; SSE2-LABEL: shuffle_i32_of_shl_i64:
112; SSE2:       # %bb.0:
113; SSE2-NEXT:    psllq $63, %xmm0
114; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
115; SSE2-NEXT:    ret{{[l|q]}}
116;
117; AVX2-LABEL: shuffle_i32_of_shl_i64:
118; AVX2:       # %bb.0:
119; AVX2-NEXT:    vpsllq $63, %xmm0, %xmm0
120; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
121; AVX2-NEXT:    ret{{[l|q]}}
122  %i1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %x, i32 63)
123  %i2 = bitcast <2 x i64> %i1 to <4 x i32>
124  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
125  ret <4 x i32> %i3
126}
127define <4 x i32> @shuffle_i32_of_lshr_i64(<2 x i64> %x) nounwind {
128; SSE2-LABEL: shuffle_i32_of_lshr_i64:
129; SSE2:       # %bb.0:
130; SSE2-NEXT:    psrlq $63, %xmm0
131; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
132; SSE2-NEXT:    ret{{[l|q]}}
133;
134; AVX2-LABEL: shuffle_i32_of_lshr_i64:
135; AVX2:       # %bb.0:
136; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
137; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
138; AVX2-NEXT:    ret{{[l|q]}}
139  %i1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %x, i32 63)
140  %i2 = bitcast <2 x i64> %i1 to <4 x i32>
141  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
142  ret <4 x i32> %i3
143}
144define <4 x i32> @shuffle_i32_of_ashr_i64(<2 x i64> %x) nounwind {
145; X64-SSE2-LABEL: shuffle_i32_of_ashr_i64:
146; X64-SSE2:       # %bb.0:
147; X64-SSE2-NEXT:    pushq %rax
148; X64-SSE2-NEXT:    movl $63, %edi
149; X64-SSE2-NEXT:    callq llvm.x86.sse2.psrai.q@PLT
150; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
151; X64-SSE2-NEXT:    popq %rax
152; X64-SSE2-NEXT:    retq
153;
154; X64-AVX2-LABEL: shuffle_i32_of_ashr_i64:
155; X64-AVX2:       # %bb.0:
156; X64-AVX2-NEXT:    pushq %rax
157; X64-AVX2-NEXT:    movl $63, %edi
158; X64-AVX2-NEXT:    callq llvm.x86.sse2.psrai.q@PLT
159; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
160; X64-AVX2-NEXT:    popq %rax
161; X64-AVX2-NEXT:    retq
162;
163; X86-SSE2-LABEL: shuffle_i32_of_ashr_i64:
164; X86-SSE2:       # %bb.0:
165; X86-SSE2-NEXT:    pushl $63
166; X86-SSE2-NEXT:    calll llvm.x86.sse2.psrai.q@PLT
167; X86-SSE2-NEXT:    addl $4, %esp
168; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
169; X86-SSE2-NEXT:    retl
170;
171; X86-AVX2-LABEL: shuffle_i32_of_ashr_i64:
172; X86-AVX2:       # %bb.0:
173; X86-AVX2-NEXT:    pushl $63
174; X86-AVX2-NEXT:    calll llvm.x86.sse2.psrai.q@PLT
175; X86-AVX2-NEXT:    addl $4, %esp
176; X86-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
177; X86-AVX2-NEXT:    retl
178  %i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
179  %i2 = bitcast <2 x i64> %i1 to <4 x i32>
180  %i3 = shufflevector <4 x i32> %i2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
181  ret <4 x i32> %i3
182}
183
184;------------------------------ 64-bit shuffles -------------------------------;
185
186define <2 x i64> @shuffle_i64_of_shl_i16(<8 x i16> %x) nounwind {
187; SSE2-LABEL: shuffle_i64_of_shl_i16:
188; SSE2:       # %bb.0:
189; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
190; SSE2-NEXT:    psllw $15, %xmm0
191; SSE2-NEXT:    ret{{[l|q]}}
192;
193; AVX2-LABEL: shuffle_i64_of_shl_i16:
194; AVX2:       # %bb.0:
195; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
196; AVX2-NEXT:    vpsllw $15, %xmm0, %xmm0
197; AVX2-NEXT:    ret{{[l|q]}}
198  %i1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 15)
199  %i2 = bitcast <8 x i16> %i1 to <2 x i64>
200  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
201  ret <2 x i64> %i3
202}
203define <2 x i64> @shuffle_i64_of_lshr_i16(<8 x i16> %x) nounwind {
204; SSE2-LABEL: shuffle_i64_of_lshr_i16:
205; SSE2:       # %bb.0:
206; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
207; SSE2-NEXT:    psrlw $15, %xmm0
208; SSE2-NEXT:    ret{{[l|q]}}
209;
210; AVX2-LABEL: shuffle_i64_of_lshr_i16:
211; AVX2:       # %bb.0:
212; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
213; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
214; AVX2-NEXT:    ret{{[l|q]}}
215  %i1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %x, i32 15)
216  %i2 = bitcast <8 x i16> %i1 to <2 x i64>
217  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
218  ret <2 x i64> %i3
219}
220define <2 x i64> @shuffle_i64_of_ashr_i16(<8 x i16> %x) nounwind {
221; SSE2-LABEL: shuffle_i64_of_ashr_i16:
222; SSE2:       # %bb.0:
223; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
224; SSE2-NEXT:    psraw $15, %xmm0
225; SSE2-NEXT:    ret{{[l|q]}}
226;
227; AVX2-LABEL: shuffle_i64_of_ashr_i16:
228; AVX2:       # %bb.0:
229; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
230; AVX2-NEXT:    vpsraw $15, %xmm0, %xmm0
231; AVX2-NEXT:    ret{{[l|q]}}
232  %i1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %x, i32 15)
233  %i2 = bitcast <8 x i16> %i1 to <2 x i64>
234  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
235  ret <2 x i64> %i3
236}
237
238define <2 x i64> @shuffle_i64_of_shl_i32(<4 x i32> %x) nounwind {
239; SSE2-LABEL: shuffle_i64_of_shl_i32:
240; SSE2:       # %bb.0:
241; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
242; SSE2-NEXT:    pslld $31, %xmm0
243; SSE2-NEXT:    ret{{[l|q]}}
244;
245; AVX2-LABEL: shuffle_i64_of_shl_i32:
246; AVX2:       # %bb.0:
247; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
248; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
249; AVX2-NEXT:    ret{{[l|q]}}
250  %i1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %x, i32 31)
251  %i2 = bitcast <4 x i32> %i1 to <2 x i64>
252  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
253  ret <2 x i64> %i3
254}
255define <2 x i64> @shuffle_i64_of_lshr_i32(<4 x i32> %x) nounwind {
256; SSE2-LABEL: shuffle_i64_of_lshr_i32:
257; SSE2:       # %bb.0:
258; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
259; SSE2-NEXT:    psrld $31, %xmm0
260; SSE2-NEXT:    ret{{[l|q]}}
261;
262; AVX2-LABEL: shuffle_i64_of_lshr_i32:
263; AVX2:       # %bb.0:
264; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
265; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
266; AVX2-NEXT:    ret{{[l|q]}}
267  %i1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %x, i32 31)
268  %i2 = bitcast <4 x i32> %i1 to <2 x i64>
269  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
270  ret <2 x i64> %i3
271}
272define <2 x i64> @shuffle_i64_of_ashr_i32(<4 x i32> %x) nounwind {
273; SSE2-LABEL: shuffle_i64_of_ashr_i32:
274; SSE2:       # %bb.0:
275; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
276; SSE2-NEXT:    psrad $31, %xmm0
277; SSE2-NEXT:    ret{{[l|q]}}
278;
279; AVX2-LABEL: shuffle_i64_of_ashr_i32:
280; AVX2:       # %bb.0:
281; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
282; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
283; AVX2-NEXT:    ret{{[l|q]}}
284  %i1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %x, i32 31)
285  %i2 = bitcast <4 x i32> %i1 to <2 x i64>
286  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
287  ret <2 x i64> %i3
288}
289
290define <2 x i64> @shuffle_i64_of_shl_i64(<2 x i64> %x) nounwind {
291; SSE2-LABEL: shuffle_i64_of_shl_i64:
292; SSE2:       # %bb.0:
293; SSE2-NEXT:    psllq $63, %xmm0
294; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
295; SSE2-NEXT:    ret{{[l|q]}}
296;
297; AVX2-LABEL: shuffle_i64_of_shl_i64:
298; AVX2:       # %bb.0:
299; AVX2-NEXT:    vpsllq $63, %xmm0, %xmm0
300; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
301; AVX2-NEXT:    ret{{[l|q]}}
302  %i1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %x, i32 63)
303  %i2 = bitcast <2 x i64> %i1 to <2 x i64>
304  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
305  ret <2 x i64> %i3
306}
307define <2 x i64> @shuffle_i64_of_lshr_i64(<2 x i64> %x) nounwind {
308; SSE2-LABEL: shuffle_i64_of_lshr_i64:
309; SSE2:       # %bb.0:
310; SSE2-NEXT:    psrlq $63, %xmm0
311; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
312; SSE2-NEXT:    ret{{[l|q]}}
313;
314; AVX2-LABEL: shuffle_i64_of_lshr_i64:
315; AVX2:       # %bb.0:
316; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
317; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
318; AVX2-NEXT:    ret{{[l|q]}}
319  %i1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %x, i32 63)
320  %i2 = bitcast <2 x i64> %i1 to <2 x i64>
321  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
322  ret <2 x i64> %i3
323}
324define <2 x i64> @shuffle_i64_of_ashr_i64(<2 x i64> %x) nounwind {
325; X64-SSE2-LABEL: shuffle_i64_of_ashr_i64:
326; X64-SSE2:       # %bb.0:
327; X64-SSE2-NEXT:    pushq %rax
328; X64-SSE2-NEXT:    movl $63, %edi
329; X64-SSE2-NEXT:    callq llvm.x86.sse2.psrai.q@PLT
330; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
331; X64-SSE2-NEXT:    popq %rax
332; X64-SSE2-NEXT:    retq
333;
334; X64-AVX2-LABEL: shuffle_i64_of_ashr_i64:
335; X64-AVX2:       # %bb.0:
336; X64-AVX2-NEXT:    pushq %rax
337; X64-AVX2-NEXT:    movl $63, %edi
338; X64-AVX2-NEXT:    callq llvm.x86.sse2.psrai.q@PLT
339; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
340; X64-AVX2-NEXT:    popq %rax
341; X64-AVX2-NEXT:    retq
342;
343; X86-SSE2-LABEL: shuffle_i64_of_ashr_i64:
344; X86-SSE2:       # %bb.0:
345; X86-SSE2-NEXT:    pushl $63
346; X86-SSE2-NEXT:    calll llvm.x86.sse2.psrai.q@PLT
347; X86-SSE2-NEXT:    addl $4, %esp
348; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
349; X86-SSE2-NEXT:    retl
350;
351; X86-AVX2-LABEL: shuffle_i64_of_ashr_i64:
352; X86-AVX2:       # %bb.0:
353; X86-AVX2-NEXT:    pushl $63
354; X86-AVX2-NEXT:    calll llvm.x86.sse2.psrai.q@PLT
355; X86-AVX2-NEXT:    addl $4, %esp
356; X86-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
357; X86-AVX2-NEXT:    retl
358  %i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
359  %i2 = bitcast <2 x i64> %i1 to <2 x i64>
360  %i3 = shufflevector <2 x i64> %i2, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
361  ret <2 x i64> %i3
362}
363
364declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
365declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
366declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
367declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
368declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32)
369declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
370declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32)
371declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32)
372declare <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64>, i32) ; does not exist
373;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
374; CHECK: {{.*}}
375; X64: {{.*}}
376; X86: {{.*}}
377