xref: /llvm-project/llvm/test/CodeGen/X86/concat-cast.ll (revision 310a9a4f2881de25101be121fdea114d20e6dc5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2      | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1    | FileCheck %s --check-prefixes=SSE,SSE4
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx       | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2      | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f   | FileCheck %s --check-prefixes=AVX,AVX512F
7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl  | FileCheck %s --check-prefixes=AVX,AVX512VL
8
9define <4 x float> @sitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
10; SSE-LABEL: sitofp_v4i32_v4f32:
11; SSE:       # %bb.0:
12; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: sitofp_v4i32_v4f32:
17; AVX:       # %bb.0:
18; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
19; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %s0 = sitofp <2 x i32> %x to <2 x float>
22  %s1 = sitofp <2 x i32> %y to <2 x float>
23  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24  ret <4 x float> %r
25}
26
27define <4 x float> @uitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
28; SSE2-LABEL: uitofp_v4i32_v4f32:
29; SSE2:       # %bb.0:
30; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
31; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
32; SSE2-NEXT:    pand %xmm0, %xmm1
33; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34; SSE2-NEXT:    psrld $16, %xmm0
35; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
36; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
37; SSE2-NEXT:    addps %xmm1, %xmm0
38; SSE2-NEXT:    retq
39;
40; SSE4-LABEL: uitofp_v4i32_v4f32:
41; SSE4:       # %bb.0:
42; SSE4-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
43; SSE4-NEXT:    movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
44; SSE4-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
45; SSE4-NEXT:    psrld $16, %xmm0
46; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
47; SSE4-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
48; SSE4-NEXT:    addps %xmm1, %xmm0
49; SSE4-NEXT:    retq
50;
51; AVX1-LABEL: uitofp_v4i32_v4f32:
52; AVX1:       # %bb.0:
53; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
54; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
55; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
56; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
57; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
58; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
59; AVX1-NEXT:    retq
60;
61; AVX2-LABEL: uitofp_v4i32_v4f32:
62; AVX2:       # %bb.0:
63; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
64; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
65; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
66; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
67; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
68; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
69; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
70; AVX2-NEXT:    vsubps %xmm2, %xmm0, %xmm0
71; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
72; AVX2-NEXT:    retq
73;
74; AVX512F-LABEL: uitofp_v4i32_v4f32:
75; AVX512F:       # %bb.0:
76; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
77; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
78; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
79; AVX512F-NEXT:    vzeroupper
80; AVX512F-NEXT:    retq
81;
82; AVX512VL-LABEL: uitofp_v4i32_v4f32:
83; AVX512VL:       # %bb.0:
84; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
85; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
86; AVX512VL-NEXT:    retq
87  %s0 = uitofp <2 x i32> %x to <2 x float>
88  %s1 = uitofp <2 x i32> %y to <2 x float>
89  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
90  ret <4 x float> %r
91}
92
93define <4 x i32> @fptosi_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
94; SSE-LABEL: fptosi_v4f32_v4i32:
95; SSE:       # %bb.0:
96; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
97; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
98; SSE-NEXT:    retq
99;
100; AVX-LABEL: fptosi_v4f32_v4i32:
101; AVX:       # %bb.0:
102; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
104; AVX-NEXT:    retq
105  %s0 = fptosi <2 x float> %x to <2 x i32>
106  %s1 = fptosi <2 x float> %y to <2 x i32>
107  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108  ret <4 x i32> %r
109}
110
111define <4 x i32> @fptoui_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
112; SSE-LABEL: fptoui_v4f32_v4i32:
113; SSE:       # %bb.0:
114; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
115; SSE-NEXT:    cvttps2dq %xmm0, %xmm1
116; SSE-NEXT:    movdqa %xmm1, %xmm2
117; SSE-NEXT:    psrad $31, %xmm2
118; SSE-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
119; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
120; SSE-NEXT:    pand %xmm2, %xmm0
121; SSE-NEXT:    por %xmm1, %xmm0
122; SSE-NEXT:    retq
123;
124; AVX1-LABEL: fptoui_v4f32_v4i32:
125; AVX1:       # %bb.0:
126; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
127; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm1
128; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
129; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
130; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
131; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
132; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
133; AVX1-NEXT:    retq
134;
135; AVX2-LABEL: fptoui_v4f32_v4i32:
136; AVX2:       # %bb.0:
137; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
138; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm1
139; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm2
140; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
141; AVX2-NEXT:    vsubps %xmm3, %xmm0, %xmm0
142; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
143; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
144; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
145; AVX2-NEXT:    retq
146;
147; AVX512F-LABEL: fptoui_v4f32_v4i32:
148; AVX512F:       # %bb.0:
149; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
150; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
151; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
152; AVX512F-NEXT:    vzeroupper
153; AVX512F-NEXT:    retq
154;
155; AVX512VL-LABEL: fptoui_v4f32_v4i32:
156; AVX512VL:       # %bb.0:
157; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
158; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
159; AVX512VL-NEXT:    retq
160  %s0 = fptoui <2 x float> %x to <2 x i32>
161  %s1 = fptoui <2 x float> %y to <2 x i32>
162  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163  ret <4 x i32> %r
164}
165
166define <4 x double> @sitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
167; SSE-LABEL: sitofp_v4i32_v4f64:
168; SSE:       # %bb.0:
169; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
170; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
171; SSE-NEXT:    retq
172;
173; AVX-LABEL: sitofp_v4i32_v4f64:
174; AVX:       # %bb.0:
175; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
176; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
177; AVX-NEXT:    retq
178  %s0 = sitofp <2 x i32> %x to <2 x double>
179  %s1 = sitofp <2 x i32> %y to <2 x double>
180  %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
181  ret <4 x double> %r
182}
183
184define <4 x double> @uitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
185; SSE2-LABEL: uitofp_v4i32_v4f64:
186; SSE2:       # %bb.0:
187; SSE2-NEXT:    xorpd %xmm2, %xmm2
188; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
189; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
190; SSE2-NEXT:    orpd %xmm3, %xmm0
191; SSE2-NEXT:    subpd %xmm3, %xmm0
192; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
193; SSE2-NEXT:    orpd %xmm3, %xmm1
194; SSE2-NEXT:    subpd %xmm3, %xmm1
195; SSE2-NEXT:    retq
196;
197; SSE4-LABEL: uitofp_v4i32_v4f64:
198; SSE4:       # %bb.0:
199; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
200; SSE4-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
201; SSE4-NEXT:    por %xmm2, %xmm0
202; SSE4-NEXT:    subpd %xmm2, %xmm0
203; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
204; SSE4-NEXT:    por %xmm2, %xmm1
205; SSE4-NEXT:    subpd %xmm2, %xmm1
206; SSE4-NEXT:    retq
207;
208; AVX1-LABEL: uitofp_v4i32_v4f64:
209; AVX1:       # %bb.0:
210; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
211; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
212; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
213; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
214; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
215; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
216; AVX1-NEXT:    retq
217;
218; AVX2-LABEL: uitofp_v4i32_v4f64:
219; AVX2:       # %bb.0:
220; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
221; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
222; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
223; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
224; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
225; AVX2-NEXT:    retq
226;
227; AVX512F-LABEL: uitofp_v4i32_v4f64:
228; AVX512F:       # %bb.0:
229; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
230; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
231; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
232; AVX512F-NEXT:    retq
233;
234; AVX512VL-LABEL: uitofp_v4i32_v4f64:
235; AVX512VL:       # %bb.0:
236; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
238; AVX512VL-NEXT:    retq
239  %s0 = uitofp <2 x i32> %x to <2 x double>
240  %s1 = uitofp <2 x i32> %y to <2 x double>
241  %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
242  ret <4 x double> %r
243}
244
245define <4 x i32> @fptosi_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
246; SSE-LABEL: fptosi_v4f64_v4i32:
247; SSE:       # %bb.0:
248; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
249; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
250; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
251; SSE-NEXT:    retq
252;
253; AVX-LABEL: fptosi_v4f64_v4i32:
254; AVX:       # %bb.0:
255; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
256; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
257; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
258; AVX-NEXT:    vzeroupper
259; AVX-NEXT:    retq
260  %s0 = fptosi <2 x double> %x to <2 x i32>
261  %s1 = fptosi <2 x double> %y to <2 x i32>
262  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
263  ret <4 x i32> %r
264}
265
266define <4 x i32> @fptoui_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
267; SSE-LABEL: fptoui_v4f64_v4i32:
268; SSE:       # %bb.0:
269; SSE-NEXT:    movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
270; SSE-NEXT:    cvttpd2dq %xmm0, %xmm3
271; SSE-NEXT:    subpd %xmm2, %xmm0
272; SSE-NEXT:    cvttpd2dq %xmm0, %xmm4
273; SSE-NEXT:    movapd %xmm3, %xmm0
274; SSE-NEXT:    psrad $31, %xmm0
275; SSE-NEXT:    pand %xmm4, %xmm0
276; SSE-NEXT:    por %xmm3, %xmm0
277; SSE-NEXT:    cvttpd2dq %xmm1, %xmm3
278; SSE-NEXT:    subpd %xmm2, %xmm1
279; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
280; SSE-NEXT:    movapd %xmm3, %xmm2
281; SSE-NEXT:    psrad $31, %xmm2
282; SSE-NEXT:    pand %xmm1, %xmm2
283; SSE-NEXT:    por %xmm3, %xmm2
284; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
285; SSE-NEXT:    retq
286;
287; AVX1-LABEL: fptoui_v4f64_v4i32:
288; AVX1:       # %bb.0:
289; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
290; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
291; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm1
292; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
293; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
294; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
295; AVX1-NEXT:    vandpd %xmm2, %xmm0, %xmm0
296; AVX1-NEXT:    vorpd %xmm0, %xmm1, %xmm0
297; AVX1-NEXT:    vzeroupper
298; AVX1-NEXT:    retq
299;
300; AVX2-LABEL: fptoui_v4f64_v4i32:
301; AVX2:       # %bb.0:
302; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
303; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
304; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm1
305; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm2
306; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
307; AVX2-NEXT:    vsubpd %ymm3, %ymm0, %ymm0
308; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
309; AVX2-NEXT:    vandpd %xmm2, %xmm0, %xmm0
310; AVX2-NEXT:    vorpd %xmm0, %xmm1, %xmm0
311; AVX2-NEXT:    vzeroupper
312; AVX2-NEXT:    retq
313;
314; AVX512F-LABEL: fptoui_v4f64_v4i32:
315; AVX512F:       # %bb.0:
316; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
317; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
318; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
319; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
320; AVX512F-NEXT:    vzeroupper
321; AVX512F-NEXT:    retq
322;
323; AVX512VL-LABEL: fptoui_v4f64_v4i32:
324; AVX512VL:       # %bb.0:
325; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
326; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
327; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
328; AVX512VL-NEXT:    vzeroupper
329; AVX512VL-NEXT:    retq
330  %s0 = fptoui <2 x double> %x to <2 x i32>
331  %s1 = fptoui <2 x double> %y to <2 x i32>
332  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
333  ret <4 x i32> %r
334}
335
336; Negative test
337
338define <4 x float> @mismatch_tofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
339; SSE2-LABEL: mismatch_tofp_v4i32_v4f32:
340; SSE2:       # %bb.0:
341; SSE2-NEXT:    xorpd %xmm2, %xmm2
342; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
343; SSE2-NEXT:    movapd {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
344; SSE2-NEXT:    orpd %xmm2, %xmm0
345; SSE2-NEXT:    subpd %xmm2, %xmm0
346; SSE2-NEXT:    cvtpd2ps %xmm0, %xmm0
347; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
348; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
349; SSE2-NEXT:    retq
350;
351; SSE4-LABEL: mismatch_tofp_v4i32_v4f32:
352; SSE4:       # %bb.0:
353; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
354; SSE4-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
355; SSE4-NEXT:    por %xmm2, %xmm0
356; SSE4-NEXT:    subpd %xmm2, %xmm0
357; SSE4-NEXT:    cvtpd2ps %xmm0, %xmm0
358; SSE4-NEXT:    cvtdq2ps %xmm1, %xmm1
359; SSE4-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
360; SSE4-NEXT:    retq
361;
362; AVX1-LABEL: mismatch_tofp_v4i32_v4f32:
363; AVX1:       # %bb.0:
364; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
365; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
366; AVX1-NEXT:    # xmm2 = mem[0,0]
367; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
368; AVX1-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
369; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
370; AVX1-NEXT:    vcvtdq2ps %xmm1, %xmm1
371; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
372; AVX1-NEXT:    retq
373;
374; AVX2-LABEL: mismatch_tofp_v4i32_v4f32:
375; AVX2:       # %bb.0:
376; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
377; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
378; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
379; AVX2-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
380; AVX2-NEXT:    vcvtpd2ps %xmm0, %xmm0
381; AVX2-NEXT:    vcvtdq2ps %xmm1, %xmm1
382; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
383; AVX2-NEXT:    retq
384;
385; AVX512F-LABEL: mismatch_tofp_v4i32_v4f32:
386; AVX512F:       # %bb.0:
387; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
388; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
389; AVX512F-NEXT:    vcvtdq2ps %xmm1, %xmm1
390; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
391; AVX512F-NEXT:    vzeroupper
392; AVX512F-NEXT:    retq
393;
394; AVX512VL-LABEL: mismatch_tofp_v4i32_v4f32:
395; AVX512VL:       # %bb.0:
396; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
397; AVX512VL-NEXT:    vcvtdq2ps %xmm1, %xmm1
398; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
399; AVX512VL-NEXT:    retq
400  %s0 = uitofp <2 x i32> %x to <2 x float>
401  %s1 = sitofp <2 x i32> %y to <2 x float>
402  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
403  ret <4 x float> %r
404}
405
406; Negative test
407
408define <4 x float> @sitofp_v4i32_v4f32_extra_use(<2 x i32> %x, <2 x i32> %y, ptr %p) {
409; SSE-LABEL: sitofp_v4i32_v4f32_extra_use:
410; SSE:       # %bb.0:
411; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
412; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
413; SSE-NEXT:    movlps %xmm1, (%rdi)
414; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
415; SSE-NEXT:    retq
416;
417; AVX-LABEL: sitofp_v4i32_v4f32_extra_use:
418; AVX:       # %bb.0:
419; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
420; AVX-NEXT:    vcvtdq2ps %xmm1, %xmm1
421; AVX-NEXT:    vmovlps %xmm1, (%rdi)
422; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
423; AVX-NEXT:    retq
424  %s0 = sitofp <2 x i32> %x to <2 x float>
425  %s1 = sitofp <2 x i32> %y to <2 x float>
426  store <2 x float> %s1, ptr %p
427  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
428  ret <4 x float> %r
429}
430
431define <4 x float> @PR45794(<2 x i64> %x, <2 x i64> %y) {
432; SSE-LABEL: PR45794:
433; SSE:       # %bb.0:
434; SSE-NEXT:    psrad $16, %xmm0
435; SSE-NEXT:    psrad $16, %xmm1
436; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
437; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
438; SSE-NEXT:    retq
439;
440; AVX1-LABEL: PR45794:
441; AVX1:       # %bb.0:
442; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
443; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
444; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
445; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
446; AVX1-NEXT:    retq
447;
448; AVX2-LABEL: PR45794:
449; AVX2:       # %bb.0:
450; AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
451; AVX2-NEXT:    vpsrad $16, %xmm1, %xmm1
452; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
453; AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
454; AVX2-NEXT:    retq
455;
456; AVX512F-LABEL: PR45794:
457; AVX512F:       # %bb.0:
458; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
459; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
460; AVX512F-NEXT:    vpsraq $48, %zmm0, %zmm0
461; AVX512F-NEXT:    vpsraq $48, %zmm1, %zmm1
462; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
463; AVX512F-NEXT:    vcvtdq2ps %xmm0, %xmm0
464; AVX512F-NEXT:    vzeroupper
465; AVX512F-NEXT:    retq
466;
467; AVX512VL-LABEL: PR45794:
468; AVX512VL:       # %bb.0:
469; AVX512VL-NEXT:    vpsraq $48, %xmm0, %xmm0
470; AVX512VL-NEXT:    vpsraq $48, %xmm1, %xmm1
471; AVX512VL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
472; AVX512VL-NEXT:    vcvtdq2ps %xmm0, %xmm0
473; AVX512VL-NEXT:    retq
474  %a0 = ashr <2 x i64> %x, <i64 48, i64 48>
475  %s0 = sitofp <2 x i64> %a0 to <2 x float>
476  %a1 = ashr <2 x i64> %y, <i64 48, i64 48>
477  %s1 = sitofp <2 x i64> %a1 to <2 x float>
478  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
479  ret <4 x float> %r
480}
481