xref: /llvm-project/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
14
15; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
17
18declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
19
20;
21; Variable Shifts
22;
23
24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
25; SSE2-LABEL: var_funnnel_v2i32:
26; SSE2:       # %bb.0:
27; SSE2-NEXT:    pxor %xmm2, %xmm2
28; SSE2-NEXT:    psubd %xmm1, %xmm2
29; SSE2-NEXT:    pslld $23, %xmm2
30; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
31; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
32; SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
33; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
34; SSE2-NEXT:    pmuludq %xmm1, %xmm0
35; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
36; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
37; SSE2-NEXT:    pmuludq %xmm2, %xmm1
38; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
39; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
40; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
41; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
42; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
43; SSE2-NEXT:    por %xmm3, %xmm0
44; SSE2-NEXT:    retq
45;
46; SSE41-LABEL: var_funnnel_v2i32:
47; SSE41:       # %bb.0:
48; SSE41-NEXT:    pxor %xmm2, %xmm2
49; SSE41-NEXT:    psubd %xmm1, %xmm2
50; SSE41-NEXT:    pslld $23, %xmm2
51; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
52; SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
53; SSE41-NEXT:    cvttps2dq %xmm2, %xmm1
54; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
55; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
56; SSE41-NEXT:    pmuludq %xmm2, %xmm3
57; SSE41-NEXT:    pmuludq %xmm1, %xmm0
58; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
59; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
60; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
61; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
62; SSE41-NEXT:    por %xmm1, %xmm0
63; SSE41-NEXT:    retq
64;
65; AVX1-LABEL: var_funnnel_v2i32:
66; AVX1:       # %bb.0:
67; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
68; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
69; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
70; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
71; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
72; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
73; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
74; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
75; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
76; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
77; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
78; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
79; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
80; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
81; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
82; AVX1-NEXT:    retq
83;
84; AVX2-LABEL: var_funnnel_v2i32:
85; AVX2:       # %bb.0:
86; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
87; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
88; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2
89; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
90; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
91; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
92; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
93; AVX2-NEXT:    retq
94;
95; AVX512F-LABEL: var_funnnel_v2i32:
96; AVX512F:       # %bb.0:
97; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
98; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
99; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
100; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
101; AVX512F-NEXT:    vzeroupper
102; AVX512F-NEXT:    retq
103;
104; AVX512VL-LABEL: var_funnnel_v2i32:
105; AVX512VL:       # %bb.0:
106; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
107; AVX512VL-NEXT:    retq
108;
109; AVX512BW-LABEL: var_funnnel_v2i32:
110; AVX512BW:       # %bb.0:
111; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
112; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
113; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
114; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
115; AVX512BW-NEXT:    vzeroupper
116; AVX512BW-NEXT:    retq
117;
118; AVX512VLBW-LABEL: var_funnnel_v2i32:
119; AVX512VLBW:       # %bb.0:
120; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
121; AVX512VLBW-NEXT:    retq
122;
123; AVX512VBMI2-LABEL: var_funnnel_v2i32:
124; AVX512VBMI2:       # %bb.0:
125; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
126; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
127; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
128; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
129; AVX512VBMI2-NEXT:    vzeroupper
130; AVX512VBMI2-NEXT:    retq
131;
132; AVX512VLVBMI2-LABEL: var_funnnel_v2i32:
133; AVX512VLVBMI2:       # %bb.0:
134; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
135; AVX512VLVBMI2-NEXT:    retq
136;
137; XOP-LABEL: var_funnnel_v2i32:
138; XOP:       # %bb.0:
139; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
140; XOP-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
141; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
142; XOP-NEXT:    retq
143;
144; X86-SSE2-LABEL: var_funnnel_v2i32:
145; X86-SSE2:       # %bb.0:
146; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
147; X86-SSE2-NEXT:    psubd %xmm1, %xmm2
148; X86-SSE2-NEXT:    pslld $23, %xmm2
149; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
150; X86-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
151; X86-SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
152; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
153; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
154; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
155; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
156; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
157; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
158; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
159; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
160; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
161; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
162; X86-SSE2-NEXT:    por %xmm3, %xmm0
163; X86-SSE2-NEXT:    retl
164  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt)
165  ret <2 x i32> %res
166}
167
168;
169; Uniform Variable Shifts
170;
171
172define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
173; SSE2-LABEL: splatvar_funnnel_v2i32:
174; SSE2:       # %bb.0:
175; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
176; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
177; SSE2-NEXT:    psrlq %xmm1, %xmm2
178; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
179; SSE2-NEXT:    psrlq %xmm1, %xmm0
180; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
181; SSE2-NEXT:    retq
182;
183; SSE41-LABEL: splatvar_funnnel_v2i32:
184; SSE41:       # %bb.0:
185; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
186; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
187; SSE41-NEXT:    psrlq %xmm1, %xmm2
188; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
189; SSE41-NEXT:    psrlq %xmm1, %xmm0
190; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
191; SSE41-NEXT:    retq
192;
193; AVX1-LABEL: splatvar_funnnel_v2i32:
194; AVX1:       # %bb.0:
195; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
196; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
197; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
198; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
199; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
200; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
201; AVX1-NEXT:    retq
202;
203; AVX2-LABEL: splatvar_funnnel_v2i32:
204; AVX2:       # %bb.0:
205; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
206; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
207; AVX2-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
208; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
209; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
210; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
211; AVX2-NEXT:    retq
212;
213; AVX512F-LABEL: splatvar_funnnel_v2i32:
214; AVX512F:       # %bb.0:
215; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
216; AVX512F-NEXT:    vpbroadcastd %xmm1, %xmm1
217; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
218; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
219; AVX512F-NEXT:    vzeroupper
220; AVX512F-NEXT:    retq
221;
222; AVX512VL-LABEL: splatvar_funnnel_v2i32:
223; AVX512VL:       # %bb.0:
224; AVX512VL-NEXT:    vpbroadcastd %xmm1, %xmm1
225; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
226; AVX512VL-NEXT:    retq
227;
228; AVX512BW-LABEL: splatvar_funnnel_v2i32:
229; AVX512BW:       # %bb.0:
230; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
231; AVX512BW-NEXT:    vpbroadcastd %xmm1, %xmm1
232; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
233; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
234; AVX512BW-NEXT:    vzeroupper
235; AVX512BW-NEXT:    retq
236;
237; AVX512VLBW-LABEL: splatvar_funnnel_v2i32:
238; AVX512VLBW:       # %bb.0:
239; AVX512VLBW-NEXT:    vpbroadcastd %xmm1, %xmm1
240; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
241; AVX512VLBW-NEXT:    retq
242;
243; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32:
244; AVX512VBMI2:       # %bb.0:
245; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
246; AVX512VBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
247; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
248; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
249; AVX512VBMI2-NEXT:    vzeroupper
250; AVX512VBMI2-NEXT:    retq
251;
252; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32:
253; AVX512VLVBMI2:       # %bb.0:
254; AVX512VLVBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
255; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
256; AVX512VLVBMI2-NEXT:    retq
257;
258; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
259; XOPAVX1:       # %bb.0:
260; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
261; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
262; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
263; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
264; XOPAVX1-NEXT:    retq
265;
266; XOPAVX2-LABEL: splatvar_funnnel_v2i32:
267; XOPAVX2:       # %bb.0:
268; XOPAVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
269; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
270; XOPAVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
271; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
272; XOPAVX2-NEXT:    retq
273;
274; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
275; X86-SSE2:       # %bb.0:
276; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
277; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
278; X86-SSE2-NEXT:    psrlq %xmm1, %xmm2
279; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
280; X86-SSE2-NEXT:    psrlq %xmm1, %xmm0
281; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
282; X86-SSE2-NEXT:    retl
283  %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
284  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)
285  ret <2 x i32> %res
286}
287
288;
289; Constant Shifts
290;
291
292define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
293; SSE2-LABEL: constant_funnnel_v2i32:
294; SSE2:       # %bb.0:
295; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
296; SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
297; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
298; SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
299; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
300; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
301; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
302; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
303; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
304; SSE2-NEXT:    por %xmm2, %xmm0
305; SSE2-NEXT:    retq
306;
307; SSE41-LABEL: constant_funnnel_v2i32:
308; SSE41:       # %bb.0:
309; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
310; SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
311; SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
312; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
313; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
314; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
315; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
316; SSE41-NEXT:    por %xmm2, %xmm0
317; SSE41-NEXT:    retq
318;
319; AVX1-LABEL: constant_funnnel_v2i32:
320; AVX1:       # %bb.0:
321; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
322; AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
323; AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
324; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
325; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
326; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
327; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
328; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
329; AVX1-NEXT:    retq
330;
331; AVX2-LABEL: constant_funnnel_v2i32:
332; AVX2:       # %bb.0:
333; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
334; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
335; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
336; AVX2-NEXT:    retq
337;
338; AVX512F-LABEL: constant_funnnel_v2i32:
339; AVX512F:       # %bb.0:
340; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
341; AVX512F-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
342; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
343; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
344; AVX512F-NEXT:    vzeroupper
345; AVX512F-NEXT:    retq
346;
347; AVX512VL-LABEL: constant_funnnel_v2i32:
348; AVX512VL:       # %bb.0:
349; AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
350; AVX512VL-NEXT:    retq
351;
352; AVX512BW-LABEL: constant_funnnel_v2i32:
353; AVX512BW:       # %bb.0:
354; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
355; AVX512BW-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
356; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
357; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
358; AVX512BW-NEXT:    vzeroupper
359; AVX512BW-NEXT:    retq
360;
361; AVX512VLBW-LABEL: constant_funnnel_v2i32:
362; AVX512VLBW:       # %bb.0:
363; AVX512VLBW-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
364; AVX512VLBW-NEXT:    retq
365;
366; AVX512VBMI2-LABEL: constant_funnnel_v2i32:
367; AVX512VBMI2:       # %bb.0:
368; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
369; AVX512VBMI2-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0]
370; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
371; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
372; AVX512VBMI2-NEXT:    vzeroupper
373; AVX512VBMI2-NEXT:    retq
374;
375; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32:
376; AVX512VLVBMI2:       # %bb.0:
377; AVX512VLVBMI2-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
378; AVX512VLVBMI2-NEXT:    retq
379;
380; XOP-LABEL: constant_funnnel_v2i32:
381; XOP:       # %bb.0:
382; XOP-NEXT:    vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
383; XOP-NEXT:    retq
384;
385; X86-SSE2-LABEL: constant_funnnel_v2i32:
386; X86-SSE2:       # %bb.0:
387; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
388; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
389; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
390; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
391; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
392; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
393; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
394; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
395; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
396; X86-SSE2-NEXT:    por %xmm2, %xmm0
397; X86-SSE2-NEXT:    retl
398  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>)
399  ret <2 x i32> %res
400}
401
402;
403; Uniform Constant Shifts
404;
405
406define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
407; SSE2-LABEL: splatconstant_funnnel_v2i32:
408; SSE2:       # %bb.0:
409; SSE2-NEXT:    movdqa %xmm0, %xmm1
410; SSE2-NEXT:    psrld $4, %xmm1
411; SSE2-NEXT:    pslld $28, %xmm0
412; SSE2-NEXT:    por %xmm1, %xmm0
413; SSE2-NEXT:    retq
414;
415; SSE41-LABEL: splatconstant_funnnel_v2i32:
416; SSE41:       # %bb.0:
417; SSE41-NEXT:    movdqa %xmm0, %xmm1
418; SSE41-NEXT:    psrld $4, %xmm1
419; SSE41-NEXT:    pslld $28, %xmm0
420; SSE41-NEXT:    por %xmm1, %xmm0
421; SSE41-NEXT:    retq
422;
423; AVX1-LABEL: splatconstant_funnnel_v2i32:
424; AVX1:       # %bb.0:
425; AVX1-NEXT:    vpsrld $4, %xmm0, %xmm1
426; AVX1-NEXT:    vpslld $28, %xmm0, %xmm0
427; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
428; AVX1-NEXT:    retq
429;
430; AVX2-LABEL: splatconstant_funnnel_v2i32:
431; AVX2:       # %bb.0:
432; AVX2-NEXT:    vpsrld $4, %xmm0, %xmm1
433; AVX2-NEXT:    vpslld $28, %xmm0, %xmm0
434; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
435; AVX2-NEXT:    retq
436;
437; AVX512F-LABEL: splatconstant_funnnel_v2i32:
438; AVX512F:       # %bb.0:
439; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
440; AVX512F-NEXT:    vprord $4, %zmm0, %zmm0
441; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
442; AVX512F-NEXT:    vzeroupper
443; AVX512F-NEXT:    retq
444;
445; AVX512VL-LABEL: splatconstant_funnnel_v2i32:
446; AVX512VL:       # %bb.0:
447; AVX512VL-NEXT:    vprord $4, %xmm0, %xmm0
448; AVX512VL-NEXT:    retq
449;
450; AVX512BW-LABEL: splatconstant_funnnel_v2i32:
451; AVX512BW:       # %bb.0:
452; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
453; AVX512BW-NEXT:    vprord $4, %zmm0, %zmm0
454; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
455; AVX512BW-NEXT:    vzeroupper
456; AVX512BW-NEXT:    retq
457;
458; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32:
459; AVX512VLBW:       # %bb.0:
460; AVX512VLBW-NEXT:    vprord $4, %xmm0, %xmm0
461; AVX512VLBW-NEXT:    retq
462;
463; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32:
464; AVX512VBMI2:       # %bb.0:
465; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
466; AVX512VBMI2-NEXT:    vprord $4, %zmm0, %zmm0
467; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
468; AVX512VBMI2-NEXT:    vzeroupper
469; AVX512VBMI2-NEXT:    retq
470;
471; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32:
472; AVX512VLVBMI2:       # %bb.0:
473; AVX512VLVBMI2-NEXT:    vprord $4, %xmm0, %xmm0
474; AVX512VLVBMI2-NEXT:    retq
475;
476; XOP-LABEL: splatconstant_funnnel_v2i32:
477; XOP:       # %bb.0:
478; XOP-NEXT:    vprotd $28, %xmm0, %xmm0
479; XOP-NEXT:    retq
480;
481; X86-SSE2-LABEL: splatconstant_funnnel_v2i32:
482; X86-SSE2:       # %bb.0:
483; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
484; X86-SSE2-NEXT:    psrld $4, %xmm1
485; X86-SSE2-NEXT:    pslld $28, %xmm0
486; X86-SSE2-NEXT:    por %xmm1, %xmm0
487; X86-SSE2-NEXT:    retl
488  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
489  ret <2 x i32> %res
490}
491