xref: /llvm-project/llvm/test/CodeGen/X86/vec_int_to_fp.ll (revision 397bcfef741315a68e75d174b8f746a11b42c0e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
8; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
10;
11; 32-bit tests to make sure we're not doing anything stupid.
12; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown
13; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse
14; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2
15; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1
16
17;
18; Signed Integer to Double
19;
20
21define <2 x float> @sitofp_2i32_to_2f32(<2 x i32> %a) {
22; SSE-LABEL: sitofp_2i32_to_2f32:
23; SSE:       # %bb.0:
24; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
25; SSE-NEXT:    retq
26;
27; AVX-LABEL: sitofp_2i32_to_2f32:
28; AVX:       # %bb.0:
29; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
30; AVX-NEXT:    retq
31  %cvt = sitofp <2 x i32> %a to <2 x float>
32  ret <2 x float> %cvt
33}
34
35define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) {
36; SSE2-LABEL: uitofp_2i32_to_2f32:
37; SSE2:       # %bb.0:
38; SSE2-NEXT:    xorpd %xmm1, %xmm1
39; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
40; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
41; SSE2-NEXT:    orpd %xmm1, %xmm0
42; SSE2-NEXT:    subpd %xmm1, %xmm0
43; SSE2-NEXT:    cvtpd2ps %xmm0, %xmm0
44; SSE2-NEXT:    retq
45;
46; SSE41-LABEL: uitofp_2i32_to_2f32:
47; SSE41:       # %bb.0:
48; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
49; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
50; SSE41-NEXT:    por %xmm1, %xmm0
51; SSE41-NEXT:    subpd %xmm1, %xmm0
52; SSE41-NEXT:    cvtpd2ps %xmm0, %xmm0
53; SSE41-NEXT:    retq
54;
55; AVX1-LABEL: uitofp_2i32_to_2f32:
56; AVX1:       # %bb.0:
57; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
58; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
59; AVX1-NEXT:    # xmm1 = mem[0,0]
60; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
61; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
62; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
63; AVX1-NEXT:    retq
64;
65; AVX2-LABEL: uitofp_2i32_to_2f32:
66; AVX2:       # %bb.0:
67; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
68; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
69; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
70; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
71; AVX2-NEXT:    vcvtpd2ps %xmm0, %xmm0
72; AVX2-NEXT:    retq
73;
74; AVX512F-LABEL: uitofp_2i32_to_2f32:
75; AVX512F:       # %bb.0:
76; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
77; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
78; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
79; AVX512F-NEXT:    vzeroupper
80; AVX512F-NEXT:    retq
81;
82; AVX512VL-LABEL: uitofp_2i32_to_2f32:
83; AVX512VL:       # %bb.0:
84; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
85; AVX512VL-NEXT:    retq
86;
87; AVX512DQ-LABEL: uitofp_2i32_to_2f32:
88; AVX512DQ:       # %bb.0:
89; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
90; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
91; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
92; AVX512DQ-NEXT:    vzeroupper
93; AVX512DQ-NEXT:    retq
94;
95; AVX512VLDQ-LABEL: uitofp_2i32_to_2f32:
96; AVX512VLDQ:       # %bb.0:
97; AVX512VLDQ-NEXT:    vcvtudq2ps %xmm0, %xmm0
98; AVX512VLDQ-NEXT:    retq
99  %cvt = uitofp <2 x i32> %a to <2 x float>
100  ret <2 x float> %cvt
101}
102
103define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
104; SSE2-LABEL: sitofp_2i64_to_2f64:
105; SSE2:       # %bb.0:
106; SSE2-NEXT:    movq %xmm0, %rax
107; SSE2-NEXT:    cvtsi2sd %rax, %xmm1
108; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
109; SSE2-NEXT:    movq %xmm0, %rax
110; SSE2-NEXT:    xorps %xmm0, %xmm0
111; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
112; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
113; SSE2-NEXT:    movapd %xmm1, %xmm0
114; SSE2-NEXT:    retq
115;
116; SSE41-LABEL: sitofp_2i64_to_2f64:
117; SSE41:       # %bb.0:
118; SSE41-NEXT:    pextrq $1, %xmm0, %rax
119; SSE41-NEXT:    cvtsi2sd %rax, %xmm1
120; SSE41-NEXT:    movq %xmm0, %rax
121; SSE41-NEXT:    xorps %xmm0, %xmm0
122; SSE41-NEXT:    cvtsi2sd %rax, %xmm0
123; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
124; SSE41-NEXT:    retq
125;
126; VEX-LABEL: sitofp_2i64_to_2f64:
127; VEX:       # %bb.0:
128; VEX-NEXT:    vpextrq $1, %xmm0, %rax
129; VEX-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
130; VEX-NEXT:    vmovq %xmm0, %rax
131; VEX-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
132; VEX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
133; VEX-NEXT:    retq
134;
135; AVX512F-LABEL: sitofp_2i64_to_2f64:
136; AVX512F:       # %bb.0:
137; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
138; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
139; AVX512F-NEXT:    vmovq %xmm0, %rax
140; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
141; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
142; AVX512F-NEXT:    retq
143;
144; AVX512VL-LABEL: sitofp_2i64_to_2f64:
145; AVX512VL:       # %bb.0:
146; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
147; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
148; AVX512VL-NEXT:    vmovq %xmm0, %rax
149; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
150; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
151; AVX512VL-NEXT:    retq
152;
153; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
154; AVX512DQ:       # %bb.0:
155; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
156; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
157; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
158; AVX512DQ-NEXT:    vzeroupper
159; AVX512DQ-NEXT:    retq
160;
161; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64:
162; AVX512VLDQ:       # %bb.0:
163; AVX512VLDQ-NEXT:    vcvtqq2pd %xmm0, %xmm0
164; AVX512VLDQ-NEXT:    retq
165  %cvt = sitofp <2 x i64> %a to <2 x double>
166  ret <2 x double> %cvt
167}
168
169define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
170; SSE-LABEL: sitofp_2i32_to_2f64:
171; SSE:       # %bb.0:
172; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
173; SSE-NEXT:    retq
174;
175; AVX-LABEL: sitofp_2i32_to_2f64:
176; AVX:       # %bb.0:
177; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
178; AVX-NEXT:    retq
179  %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
180  %cvt = sitofp <2 x i32> %shuf to <2 x double>
181  ret <2 x double> %cvt
182}
183
184define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
185; SSE-LABEL: sitofp_4i32_to_2f64:
186; SSE:       # %bb.0:
187; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
188; SSE-NEXT:    retq
189;
190; AVX-LABEL: sitofp_4i32_to_2f64:
191; AVX:       # %bb.0:
192; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
193; AVX-NEXT:    retq
194  %cvt = sitofp <4 x i32> %a to <4 x double>
195  %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
196  ret <2 x double> %shuf
197}
198
199define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
200; SSE2-LABEL: sitofp_2i16_to_2f64:
201; SSE2:       # %bb.0:
202; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
203; SSE2-NEXT:    psrad $16, %xmm0
204; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
205; SSE2-NEXT:    retq
206;
207; SSE41-LABEL: sitofp_2i16_to_2f64:
208; SSE41:       # %bb.0:
209; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
210; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
211; SSE41-NEXT:    retq
212;
213; AVX-LABEL: sitofp_2i16_to_2f64:
214; AVX:       # %bb.0:
215; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
216; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
217; AVX-NEXT:    retq
218  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
219  %cvt = sitofp <2 x i16> %shuf to <2 x double>
220  ret <2 x double> %cvt
221}
222
223define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
224; SSE2-LABEL: sitofp_8i16_to_2f64:
225; SSE2:       # %bb.0:
226; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
227; SSE2-NEXT:    psrad $16, %xmm0
228; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
229; SSE2-NEXT:    retq
230;
231; SSE41-LABEL: sitofp_8i16_to_2f64:
232; SSE41:       # %bb.0:
233; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
234; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
235; SSE41-NEXT:    retq
236;
237; VEX-LABEL: sitofp_8i16_to_2f64:
238; VEX:       # %bb.0:
239; VEX-NEXT:    vpmovsxwd %xmm0, %xmm0
240; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
241; VEX-NEXT:    retq
242;
243; AVX512-LABEL: sitofp_8i16_to_2f64:
244; AVX512:       # %bb.0:
245; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
246; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
247; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
248; AVX512-NEXT:    vzeroupper
249; AVX512-NEXT:    retq
250  %cvt = sitofp <8 x i16> %a to <8 x double>
251  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
252  ret <2 x double> %shuf
253}
254
255define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
256; SSE2-LABEL: sitofp_2i8_to_2f64:
257; SSE2:       # %bb.0:
258; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
259; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
260; SSE2-NEXT:    psrad $24, %xmm0
261; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
262; SSE2-NEXT:    retq
263;
264; SSE41-LABEL: sitofp_2i8_to_2f64:
265; SSE41:       # %bb.0:
266; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
267; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
268; SSE41-NEXT:    retq
269;
270; AVX-LABEL: sitofp_2i8_to_2f64:
271; AVX:       # %bb.0:
272; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
273; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
274; AVX-NEXT:    retq
275  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
276  %cvt = sitofp <2 x i8> %shuf to <2 x double>
277  ret <2 x double> %cvt
278}
279
280define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
281; SSE2-LABEL: sitofp_16i8_to_2f64:
282; SSE2:       # %bb.0:
283; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
284; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
285; SSE2-NEXT:    psrad $24, %xmm0
286; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
287; SSE2-NEXT:    retq
288;
289; SSE41-LABEL: sitofp_16i8_to_2f64:
290; SSE41:       # %bb.0:
291; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
292; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
293; SSE41-NEXT:    retq
294;
295; VEX-LABEL: sitofp_16i8_to_2f64:
296; VEX:       # %bb.0:
297; VEX-NEXT:    vpmovsxbd %xmm0, %xmm0
298; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
299; VEX-NEXT:    retq
300;
301; AVX512-LABEL: sitofp_16i8_to_2f64:
302; AVX512:       # %bb.0:
303; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
304; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
305; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
306; AVX512-NEXT:    vzeroupper
307; AVX512-NEXT:    retq
308  %cvt = sitofp <16 x i8> %a to <16 x double>
309  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
310  ret <2 x double> %shuf
311}
312
313define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
314; SSE2-LABEL: sitofp_4i64_to_4f64:
315; SSE2:       # %bb.0:
316; SSE2-NEXT:    movq %xmm0, %rax
317; SSE2-NEXT:    cvtsi2sd %rax, %xmm2
318; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
319; SSE2-NEXT:    movq %xmm0, %rax
320; SSE2-NEXT:    xorps %xmm0, %xmm0
321; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
322; SSE2-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
323; SSE2-NEXT:    movq %xmm1, %rax
324; SSE2-NEXT:    cvtsi2sd %rax, %xmm3
325; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
326; SSE2-NEXT:    movq %xmm0, %rax
327; SSE2-NEXT:    xorps %xmm0, %xmm0
328; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
329; SSE2-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
330; SSE2-NEXT:    movapd %xmm2, %xmm0
331; SSE2-NEXT:    movapd %xmm3, %xmm1
332; SSE2-NEXT:    retq
333;
334; SSE41-LABEL: sitofp_4i64_to_4f64:
335; SSE41:       # %bb.0:
336; SSE41-NEXT:    pextrq $1, %xmm0, %rax
337; SSE41-NEXT:    cvtsi2sd %rax, %xmm2
338; SSE41-NEXT:    movq %xmm0, %rax
339; SSE41-NEXT:    xorps %xmm0, %xmm0
340; SSE41-NEXT:    cvtsi2sd %rax, %xmm0
341; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
342; SSE41-NEXT:    pextrq $1, %xmm1, %rax
343; SSE41-NEXT:    xorps %xmm2, %xmm2
344; SSE41-NEXT:    cvtsi2sd %rax, %xmm2
345; SSE41-NEXT:    movq %xmm1, %rax
346; SSE41-NEXT:    xorps %xmm1, %xmm1
347; SSE41-NEXT:    cvtsi2sd %rax, %xmm1
348; SSE41-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
349; SSE41-NEXT:    retq
350;
351; AVX1-LABEL: sitofp_4i64_to_4f64:
352; AVX1:       # %bb.0:
353; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
354; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
355; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
356; AVX1-NEXT:    vmovq %xmm1, %rax
357; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
358; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
359; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
360; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
361; AVX1-NEXT:    vmovq %xmm0, %rax
362; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
363; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
364; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
365; AVX1-NEXT:    retq
366;
367; AVX2-LABEL: sitofp_4i64_to_4f64:
368; AVX2:       # %bb.0:
369; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
370; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
371; AVX2-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
372; AVX2-NEXT:    vmovq %xmm1, %rax
373; AVX2-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
374; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
375; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
376; AVX2-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
377; AVX2-NEXT:    vmovq %xmm0, %rax
378; AVX2-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
379; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
380; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
381; AVX2-NEXT:    retq
382;
383; AVX512F-LABEL: sitofp_4i64_to_4f64:
384; AVX512F:       # %bb.0:
385; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
386; AVX512F-NEXT:    vpextrq $1, %xmm1, %rax
387; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
388; AVX512F-NEXT:    vmovq %xmm1, %rax
389; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
390; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
391; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
392; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
393; AVX512F-NEXT:    vmovq %xmm0, %rax
394; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
395; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
396; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
397; AVX512F-NEXT:    retq
398;
399; AVX512VL-LABEL: sitofp_4i64_to_4f64:
400; AVX512VL:       # %bb.0:
401; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
402; AVX512VL-NEXT:    vpextrq $1, %xmm1, %rax
403; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
404; AVX512VL-NEXT:    vmovq %xmm1, %rax
405; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
406; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
407; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
408; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
409; AVX512VL-NEXT:    vmovq %xmm0, %rax
410; AVX512VL-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
411; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
412; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
413; AVX512VL-NEXT:    retq
414;
415; AVX512DQ-LABEL: sitofp_4i64_to_4f64:
416; AVX512DQ:       # %bb.0:
417; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
418; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
419; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
420; AVX512DQ-NEXT:    retq
421;
422; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64:
423; AVX512VLDQ:       # %bb.0:
424; AVX512VLDQ-NEXT:    vcvtqq2pd %ymm0, %ymm0
425; AVX512VLDQ-NEXT:    retq
426  %cvt = sitofp <4 x i64> %a to <4 x double>
427  ret <4 x double> %cvt
428}
429
430define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
431; SSE-LABEL: sitofp_4i32_to_4f64:
432; SSE:       # %bb.0:
433; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
434; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
435; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
436; SSE-NEXT:    movaps %xmm2, %xmm0
437; SSE-NEXT:    retq
438;
439; AVX-LABEL: sitofp_4i32_to_4f64:
440; AVX:       # %bb.0:
441; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
442; AVX-NEXT:    retq
443  %cvt = sitofp <4 x i32> %a to <4 x double>
444  ret <4 x double> %cvt
445}
446
447define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
448; SSE2-LABEL: sitofp_4i16_to_4f64:
449; SSE2:       # %bb.0:
450; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
451; SSE2-NEXT:    psrad $16, %xmm1
452; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
453; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
454; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
455; SSE2-NEXT:    retq
456;
457; SSE41-LABEL: sitofp_4i16_to_4f64:
458; SSE41:       # %bb.0:
459; SSE41-NEXT:    pmovsxwd %xmm0, %xmm1
460; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
461; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
462; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
463; SSE41-NEXT:    retq
464;
465; AVX-LABEL: sitofp_4i16_to_4f64:
466; AVX:       # %bb.0:
467; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
468; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
469; AVX-NEXT:    retq
470  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
471  %cvt = sitofp <4 x i16> %shuf to <4 x double>
472  ret <4 x double> %cvt
473}
474
475define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
476; SSE2-LABEL: sitofp_8i16_to_4f64:
477; SSE2:       # %bb.0:
478; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
479; SSE2-NEXT:    psrad $16, %xmm1
480; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
481; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
482; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
483; SSE2-NEXT:    retq
484;
485; SSE41-LABEL: sitofp_8i16_to_4f64:
486; SSE41:       # %bb.0:
487; SSE41-NEXT:    pmovsxwd %xmm0, %xmm1
488; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
489; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
490; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
491; SSE41-NEXT:    retq
492;
493; VEX-LABEL: sitofp_8i16_to_4f64:
494; VEX:       # %bb.0:
495; VEX-NEXT:    vpmovsxwd %xmm0, %xmm0
496; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
497; VEX-NEXT:    retq
498;
499; AVX512-LABEL: sitofp_8i16_to_4f64:
500; AVX512:       # %bb.0:
501; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
502; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
503; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
504; AVX512-NEXT:    retq
505  %cvt = sitofp <8 x i16> %a to <8 x double>
506  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
507  ret <4 x double> %shuf
508}
509
510define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
511; SSE2-LABEL: sitofp_4i8_to_4f64:
512; SSE2:       # %bb.0:
513; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
514; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
515; SSE2-NEXT:    psrad $24, %xmm1
516; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
517; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
518; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
519; SSE2-NEXT:    retq
520;
521; SSE41-LABEL: sitofp_4i8_to_4f64:
522; SSE41:       # %bb.0:
523; SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
524; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
525; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
526; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
527; SSE41-NEXT:    retq
528;
529; AVX-LABEL: sitofp_4i8_to_4f64:
530; AVX:       # %bb.0:
531; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
532; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
533; AVX-NEXT:    retq
534  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
535  %cvt = sitofp <4 x i8> %shuf to <4 x double>
536  ret <4 x double> %cvt
537}
538
539define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
540; SSE2-LABEL: sitofp_16i8_to_4f64:
541; SSE2:       # %bb.0:
542; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
543; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
544; SSE2-NEXT:    psrad $24, %xmm1
545; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
546; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
547; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
548; SSE2-NEXT:    retq
549;
550; SSE41-LABEL: sitofp_16i8_to_4f64:
551; SSE41:       # %bb.0:
552; SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
553; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
554; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
555; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
556; SSE41-NEXT:    retq
557;
558; VEX-LABEL: sitofp_16i8_to_4f64:
559; VEX:       # %bb.0:
560; VEX-NEXT:    vpmovsxbd %xmm0, %xmm0
561; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
562; VEX-NEXT:    retq
563;
564; AVX512-LABEL: sitofp_16i8_to_4f64:
565; AVX512:       # %bb.0:
566; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
567; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
568; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
569; AVX512-NEXT:    retq
570  %cvt = sitofp <16 x i8> %a to <16 x double>
571  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
572  ret <4 x double> %shuf
573}
574
575;
576; Unsigned Integer to Double
577;
578
579define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
580; SSE2-LABEL: uitofp_2i64_to_2f64:
581; SSE2:       # %bb.0:
582; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
583; SSE2-NEXT:    pand %xmm0, %xmm1
584; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
585; SSE2-NEXT:    psrlq $32, %xmm0
586; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
587; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588; SSE2-NEXT:    addpd %xmm1, %xmm0
589; SSE2-NEXT:    retq
590;
591; SSE41-LABEL: uitofp_2i64_to_2f64:
592; SSE41:       # %bb.0:
593; SSE41-NEXT:    pxor %xmm1, %xmm1
594; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
595; SSE41-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
596; SSE41-NEXT:    psrlq $32, %xmm0
597; SSE41-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
598; SSE41-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
599; SSE41-NEXT:    addpd %xmm1, %xmm0
600; SSE41-NEXT:    retq
601;
602; AVX1-LABEL: uitofp_2i64_to_2f64:
603; AVX1:       # %bb.0:
604; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
605; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
606; AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
607; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
608; AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
609; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
610; AVX1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
611; AVX1-NEXT:    retq
612;
613; AVX2-LABEL: uitofp_2i64_to_2f64:
614; AVX2:       # %bb.0:
615; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
616; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
617; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
618; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
619; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
620; AVX2-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
621; AVX2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
622; AVX2-NEXT:    retq
623;
624; AVX512F-LABEL: uitofp_2i64_to_2f64:
625; AVX512F:       # %bb.0:
626; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
627; AVX512F-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
628; AVX512F-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
629; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm0
630; AVX512F-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
631; AVX512F-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
632; AVX512F-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
633; AVX512F-NEXT:    retq
634;
635; AVX512VL-LABEL: uitofp_2i64_to_2f64:
636; AVX512VL:       # %bb.0:
637; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
638; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
639; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
640; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm0
641; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
642; AVX512VL-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
643; AVX512VL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
644; AVX512VL-NEXT:    retq
645;
646; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
647; AVX512DQ:       # %bb.0:
648; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
649; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
650; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
651; AVX512DQ-NEXT:    vzeroupper
652; AVX512DQ-NEXT:    retq
653;
654; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64:
655; AVX512VLDQ:       # %bb.0:
656; AVX512VLDQ-NEXT:    vcvtuqq2pd %xmm0, %xmm0
657; AVX512VLDQ-NEXT:    retq
658  %cvt = uitofp <2 x i64> %a to <2 x double>
659  ret <2 x double> %cvt
660}
661
662define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
663; SSE2-LABEL: uitofp_2i32_to_2f64:
664; SSE2:       # %bb.0:
665; SSE2-NEXT:    xorpd %xmm1, %xmm1
666; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
667; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
668; SSE2-NEXT:    orpd %xmm1, %xmm0
669; SSE2-NEXT:    subpd %xmm1, %xmm0
670; SSE2-NEXT:    retq
671;
672; SSE41-LABEL: uitofp_2i32_to_2f64:
673; SSE41:       # %bb.0:
674; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
675; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
676; SSE41-NEXT:    por %xmm1, %xmm0
677; SSE41-NEXT:    subpd %xmm1, %xmm0
678; SSE41-NEXT:    retq
679;
680; AVX1-LABEL: uitofp_2i32_to_2f64:
681; AVX1:       # %bb.0:
682; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
683; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
684; AVX1-NEXT:    # xmm1 = mem[0,0]
685; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
686; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
687; AVX1-NEXT:    retq
688;
689; AVX2-LABEL: uitofp_2i32_to_2f64:
690; AVX2:       # %bb.0:
691; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
692; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
693; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
694; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
695; AVX2-NEXT:    retq
696;
697; AVX512F-LABEL: uitofp_2i32_to_2f64:
698; AVX512F:       # %bb.0:
699; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
700; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
701; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
702; AVX512F-NEXT:    vzeroupper
703; AVX512F-NEXT:    retq
704;
705; AVX512VL-LABEL: uitofp_2i32_to_2f64:
706; AVX512VL:       # %bb.0:
707; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
708; AVX512VL-NEXT:    retq
709;
710; AVX512DQ-LABEL: uitofp_2i32_to_2f64:
711; AVX512DQ:       # %bb.0:
712; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
713; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
714; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
715; AVX512DQ-NEXT:    vzeroupper
716; AVX512DQ-NEXT:    retq
717;
718; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64:
719; AVX512VLDQ:       # %bb.0:
720; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0
721; AVX512VLDQ-NEXT:    retq
722  %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
723  %cvt = uitofp <2 x i32> %shuf to <2 x double>
724  ret <2 x double> %cvt
725}
726
727define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
728; SSE2-LABEL: uitofp_4i32_to_2f64:
729; SSE2:       # %bb.0:
730; SSE2-NEXT:    xorpd %xmm1, %xmm1
731; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
732; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
733; SSE2-NEXT:    orpd %xmm1, %xmm0
734; SSE2-NEXT:    subpd %xmm1, %xmm0
735; SSE2-NEXT:    retq
736;
737; SSE41-LABEL: uitofp_4i32_to_2f64:
738; SSE41:       # %bb.0:
739; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
740; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
741; SSE41-NEXT:    por %xmm1, %xmm0
742; SSE41-NEXT:    subpd %xmm1, %xmm0
743; SSE41-NEXT:    retq
744;
745; AVX1-LABEL: uitofp_4i32_to_2f64:
746; AVX1:       # %bb.0:
747; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
748; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
749; AVX1-NEXT:    # xmm1 = mem[0,0]
750; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
751; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
752; AVX1-NEXT:    retq
753;
754; AVX2-LABEL: uitofp_4i32_to_2f64:
755; AVX2:       # %bb.0:
756; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
757; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
758; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
759; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
760; AVX2-NEXT:    retq
761;
762; AVX512F-LABEL: uitofp_4i32_to_2f64:
763; AVX512F:       # %bb.0:
764; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
765; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
766; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
767; AVX512F-NEXT:    vzeroupper
768; AVX512F-NEXT:    retq
769;
770; AVX512VL-LABEL: uitofp_4i32_to_2f64:
771; AVX512VL:       # %bb.0:
772; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
773; AVX512VL-NEXT:    retq
774;
775; AVX512DQ-LABEL: uitofp_4i32_to_2f64:
776; AVX512DQ:       # %bb.0:
777; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
778; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
779; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
780; AVX512DQ-NEXT:    vzeroupper
781; AVX512DQ-NEXT:    retq
782;
783; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64:
784; AVX512VLDQ:       # %bb.0:
785; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0
786; AVX512VLDQ-NEXT:    retq
787  %cvt = uitofp <4 x i32> %a to <4 x double>
788  %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
789  ret <2 x double> %shuf
790}
791
792define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
793; SSE2-LABEL: uitofp_2i16_to_2f64:
794; SSE2:       # %bb.0:
795; SSE2-NEXT:    pxor %xmm1, %xmm1
796; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
797; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
798; SSE2-NEXT:    retq
799;
800; SSE41-LABEL: uitofp_2i16_to_2f64:
801; SSE41:       # %bb.0:
802; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
803; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
804; SSE41-NEXT:    retq
805;
806; AVX-LABEL: uitofp_2i16_to_2f64:
807; AVX:       # %bb.0:
808; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
809; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
810; AVX-NEXT:    retq
811  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
812  %cvt = uitofp <2 x i16> %shuf to <2 x double>
813  ret <2 x double> %cvt
814}
815
816define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
817; SSE2-LABEL: uitofp_8i16_to_2f64:
818; SSE2:       # %bb.0:
819; SSE2-NEXT:    pxor %xmm1, %xmm1
820; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
821; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
822; SSE2-NEXT:    retq
823;
824; SSE41-LABEL: uitofp_8i16_to_2f64:
825; SSE41:       # %bb.0:
826; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
827; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
828; SSE41-NEXT:    retq
829;
830; VEX-LABEL: uitofp_8i16_to_2f64:
831; VEX:       # %bb.0:
832; VEX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
833; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
834; VEX-NEXT:    retq
835;
836; AVX512-LABEL: uitofp_8i16_to_2f64:
837; AVX512:       # %bb.0:
838; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
839; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
840; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
841; AVX512-NEXT:    vzeroupper
842; AVX512-NEXT:    retq
843  %cvt = uitofp <8 x i16> %a to <8 x double>
844  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
845  ret <2 x double> %shuf
846}
847
848define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
849; SSE2-LABEL: uitofp_2i8_to_2f64:
850; SSE2:       # %bb.0:
851; SSE2-NEXT:    pxor %xmm1, %xmm1
852; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
853; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
854; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
855; SSE2-NEXT:    retq
856;
857; SSE41-LABEL: uitofp_2i8_to_2f64:
858; SSE41:       # %bb.0:
859; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
860; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
861; SSE41-NEXT:    retq
862;
863; AVX-LABEL: uitofp_2i8_to_2f64:
864; AVX:       # %bb.0:
865; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
866; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
867; AVX-NEXT:    retq
868  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
869  %cvt = uitofp <2 x i8> %shuf to <2 x double>
870  ret <2 x double> %cvt
871}
872
873define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
874; SSE2-LABEL: uitofp_16i8_to_2f64:
875; SSE2:       # %bb.0:
876; SSE2-NEXT:    pxor %xmm1, %xmm1
877; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
878; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
879; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
880; SSE2-NEXT:    retq
881;
882; SSE41-LABEL: uitofp_16i8_to_2f64:
883; SSE41:       # %bb.0:
884; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
885; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
886; SSE41-NEXT:    retq
887;
888; VEX-LABEL: uitofp_16i8_to_2f64:
889; VEX:       # %bb.0:
890; VEX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
891; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
892; VEX-NEXT:    retq
893;
894; AVX512-LABEL: uitofp_16i8_to_2f64:
895; AVX512:       # %bb.0:
896; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
897; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
898; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
899; AVX512-NEXT:    vzeroupper
900; AVX512-NEXT:    retq
901  %cvt = uitofp <16 x i8> %a to <16 x double>
902  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
903  ret <2 x double> %shuf
904}
905
906define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
907; SSE2-LABEL: uitofp_4i64_to_4f64:
908; SSE2:       # %bb.0:
909; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
910; SSE2-NEXT:    movdqa %xmm0, %xmm3
911; SSE2-NEXT:    pand %xmm2, %xmm3
912; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
913; SSE2-NEXT:    por %xmm4, %xmm3
914; SSE2-NEXT:    psrlq $32, %xmm0
915; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
916; SSE2-NEXT:    por %xmm5, %xmm0
917; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
918; SSE2-NEXT:    subpd %xmm6, %xmm0
919; SSE2-NEXT:    addpd %xmm3, %xmm0
920; SSE2-NEXT:    pand %xmm1, %xmm2
921; SSE2-NEXT:    por %xmm4, %xmm2
922; SSE2-NEXT:    psrlq $32, %xmm1
923; SSE2-NEXT:    por %xmm5, %xmm1
924; SSE2-NEXT:    subpd %xmm6, %xmm1
925; SSE2-NEXT:    addpd %xmm2, %xmm1
926; SSE2-NEXT:    retq
927;
928; SSE41-LABEL: uitofp_4i64_to_4f64:
929; SSE41:       # %bb.0:
930; SSE41-NEXT:    pxor %xmm2, %xmm2
931; SSE41-NEXT:    movdqa %xmm0, %xmm3
932; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
933; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
934; SSE41-NEXT:    por %xmm4, %xmm3
935; SSE41-NEXT:    psrlq $32, %xmm0
936; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
937; SSE41-NEXT:    por %xmm5, %xmm0
938; SSE41-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
939; SSE41-NEXT:    subpd %xmm6, %xmm0
940; SSE41-NEXT:    addpd %xmm3, %xmm0
941; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
942; SSE41-NEXT:    por %xmm4, %xmm2
943; SSE41-NEXT:    psrlq $32, %xmm1
944; SSE41-NEXT:    por %xmm5, %xmm1
945; SSE41-NEXT:    subpd %xmm6, %xmm1
946; SSE41-NEXT:    addpd %xmm2, %xmm1
947; SSE41-NEXT:    retq
948;
949; AVX1-LABEL: uitofp_4i64_to_4f64:
950; AVX1:       # %bb.0:
951; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
952; AVX1-NEXT:    vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
953; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
954; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
955; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
956; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
957; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
958; AVX1-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
959; AVX1-NEXT:    retq
960;
961; AVX2-LABEL: uitofp_4i64_to_4f64:
962; AVX2:       # %bb.0:
963; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
964; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
965; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
966; AVX2-NEXT:    vpor %ymm2, %ymm1, %ymm1
967; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
968; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
969; AVX2-NEXT:    vpor %ymm2, %ymm0, %ymm0
970; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
971; AVX2-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
972; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
973; AVX2-NEXT:    retq
974;
975; AVX512F-LABEL: uitofp_4i64_to_4f64:
976; AVX512F:       # %bb.0:
977; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
978; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
979; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
980; AVX512F-NEXT:    vpor %ymm2, %ymm1, %ymm1
981; AVX512F-NEXT:    vpsrlq $32, %ymm0, %ymm0
982; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
983; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
984; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
985; AVX512F-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
986; AVX512F-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
987; AVX512F-NEXT:    retq
988;
989; AVX512VL-LABEL: uitofp_4i64_to_4f64:
990; AVX512VL:       # %bb.0:
991; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
992; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
993; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
994; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
995; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
996; AVX512VL-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
997; AVX512VL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
998; AVX512VL-NEXT:    retq
999;
1000; AVX512DQ-LABEL: uitofp_4i64_to_4f64:
1001; AVX512DQ:       # %bb.0:
1002; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1003; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
1004; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1005; AVX512DQ-NEXT:    retq
1006;
1007; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64:
1008; AVX512VLDQ:       # %bb.0:
1009; AVX512VLDQ-NEXT:    vcvtuqq2pd %ymm0, %ymm0
1010; AVX512VLDQ-NEXT:    retq
1011  %cvt = uitofp <4 x i64> %a to <4 x double>
1012  ret <4 x double> %cvt
1013}
1014
1015define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
1016; SSE2-LABEL: uitofp_4i32_to_4f64:
1017; SSE2:       # %bb.0:
1018; SSE2-NEXT:    movapd %xmm0, %xmm1
1019; SSE2-NEXT:    xorpd %xmm2, %xmm2
1020; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1021; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
1022; SSE2-NEXT:    orpd %xmm3, %xmm0
1023; SSE2-NEXT:    subpd %xmm3, %xmm0
1024; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1025; SSE2-NEXT:    orpd %xmm3, %xmm1
1026; SSE2-NEXT:    subpd %xmm3, %xmm1
1027; SSE2-NEXT:    retq
1028;
1029; SSE41-LABEL: uitofp_4i32_to_4f64:
1030; SSE41:       # %bb.0:
1031; SSE41-NEXT:    movdqa %xmm0, %xmm1
1032; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1033; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
1034; SSE41-NEXT:    por %xmm2, %xmm0
1035; SSE41-NEXT:    subpd %xmm2, %xmm0
1036; SSE41-NEXT:    pxor %xmm3, %xmm3
1037; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1038; SSE41-NEXT:    por %xmm2, %xmm1
1039; SSE41-NEXT:    subpd %xmm2, %xmm1
1040; SSE41-NEXT:    retq
1041;
1042; AVX1-LABEL: uitofp_4i32_to_4f64:
1043; AVX1:       # %bb.0:
1044; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1045; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1046; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1047; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1048; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
1049; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
1050; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
1051; AVX1-NEXT:    retq
1052;
1053; AVX2-LABEL: uitofp_4i32_to_4f64:
1054; AVX2:       # %bb.0:
1055; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1056; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
1057; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1058; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
1059; AVX2-NEXT:    retq
1060;
1061; AVX512F-LABEL: uitofp_4i32_to_4f64:
1062; AVX512F:       # %bb.0:
1063; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1064; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
1065; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1066; AVX512F-NEXT:    retq
1067;
1068; AVX512VL-LABEL: uitofp_4i32_to_4f64:
1069; AVX512VL:       # %bb.0:
1070; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
1071; AVX512VL-NEXT:    retq
1072;
1073; AVX512DQ-LABEL: uitofp_4i32_to_4f64:
1074; AVX512DQ:       # %bb.0:
1075; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1076; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
1077; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1078; AVX512DQ-NEXT:    retq
1079;
1080; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64:
1081; AVX512VLDQ:       # %bb.0:
1082; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %ymm0
1083; AVX512VLDQ-NEXT:    retq
1084  %cvt = uitofp <4 x i32> %a to <4 x double>
1085  ret <4 x double> %cvt
1086}
1087
1088define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
1089; SSE2-LABEL: uitofp_4i16_to_4f64:
1090; SSE2:       # %bb.0:
1091; SSE2-NEXT:    pxor %xmm1, %xmm1
1092; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1093; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
1094; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1095; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
1096; SSE2-NEXT:    movaps %xmm2, %xmm0
1097; SSE2-NEXT:    retq
1098;
1099; SSE41-LABEL: uitofp_4i16_to_4f64:
1100; SSE41:       # %bb.0:
1101; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1102; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
1103; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1104; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
1105; SSE41-NEXT:    retq
1106;
1107; AVX-LABEL: uitofp_4i16_to_4f64:
1108; AVX:       # %bb.0:
1109; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1110; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
1111; AVX-NEXT:    retq
1112  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1113  %cvt = uitofp <4 x i16> %shuf to <4 x double>
1114  ret <4 x double> %cvt
1115}
1116
1117define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
1118; SSE2-LABEL: uitofp_8i16_to_4f64:
1119; SSE2:       # %bb.0:
1120; SSE2-NEXT:    pxor %xmm1, %xmm1
1121; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1122; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
1123; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1124; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
1125; SSE2-NEXT:    movaps %xmm2, %xmm0
1126; SSE2-NEXT:    retq
1127;
1128; SSE41-LABEL: uitofp_8i16_to_4f64:
1129; SSE41:       # %bb.0:
1130; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1131; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
1132; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1133; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
1134; SSE41-NEXT:    retq
1135;
1136; VEX-LABEL: uitofp_8i16_to_4f64:
1137; VEX:       # %bb.0:
1138; VEX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1139; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
1140; VEX-NEXT:    retq
1141;
1142; AVX512-LABEL: uitofp_8i16_to_4f64:
1143; AVX512:       # %bb.0:
1144; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1145; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
1146; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1147; AVX512-NEXT:    retq
1148  %cvt = uitofp <8 x i16> %a to <8 x double>
1149  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1150  ret <4 x double> %shuf
1151}
1152
1153define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
1154; SSE2-LABEL: uitofp_4i8_to_4f64:
1155; SSE2:       # %bb.0:
1156; SSE2-NEXT:    pxor %xmm1, %xmm1
1157; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1158; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1159; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
1160; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1161; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
1162; SSE2-NEXT:    movaps %xmm2, %xmm0
1163; SSE2-NEXT:    retq
1164;
1165; SSE41-LABEL: uitofp_4i8_to_4f64:
1166; SSE41:       # %bb.0:
1167; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1168; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
1169; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1170; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
1171; SSE41-NEXT:    retq
1172;
1173; AVX-LABEL: uitofp_4i8_to_4f64:
1174; AVX:       # %bb.0:
1175; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1176; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
1177; AVX-NEXT:    retq
1178  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1179  %cvt = uitofp <4 x i8> %shuf to <4 x double>
1180  ret <4 x double> %cvt
1181}
1182
1183define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
1184; SSE2-LABEL: uitofp_16i8_to_4f64:
1185; SSE2:       # %bb.0:
1186; SSE2-NEXT:    pxor %xmm1, %xmm1
1187; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1188; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1189; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
1190; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1191; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
1192; SSE2-NEXT:    movaps %xmm2, %xmm0
1193; SSE2-NEXT:    retq
1194;
1195; SSE41-LABEL: uitofp_16i8_to_4f64:
1196; SSE41:       # %bb.0:
1197; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1198; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
1199; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1200; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
1201; SSE41-NEXT:    retq
1202;
1203; VEX-LABEL: uitofp_16i8_to_4f64:
1204; VEX:       # %bb.0:
1205; VEX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1206; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
1207; VEX-NEXT:    retq
1208;
1209; AVX512-LABEL: uitofp_16i8_to_4f64:
1210; AVX512:       # %bb.0:
1211; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1212; AVX512-NEXT:    vcvtdq2pd %ymm0, %zmm0
1213; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1214; AVX512-NEXT:    retq
1215  %cvt = uitofp <16 x i8> %a to <16 x double>
1216  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1217  ret <4 x double> %shuf
1218}
1219
1220;
1221; Signed Integer to Float
1222;
1223
1224define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
1225; SSE2-LABEL: sitofp_2i64_to_4f32:
1226; SSE2:       # %bb.0:
1227; SSE2-NEXT:    movq %xmm0, %rax
1228; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1229; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1230; SSE2-NEXT:    movq %xmm0, %rax
1231; SSE2-NEXT:    xorps %xmm0, %xmm0
1232; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1233; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1234; SSE2-NEXT:    movaps %xmm1, %xmm0
1235; SSE2-NEXT:    retq
1236;
1237; SSE41-LABEL: sitofp_2i64_to_4f32:
1238; SSE41:       # %bb.0:
1239; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1240; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
1241; SSE41-NEXT:    movq %xmm0, %rax
1242; SSE41-NEXT:    xorps %xmm0, %xmm0
1243; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
1244; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1245; SSE41-NEXT:    retq
1246;
1247; VEX-LABEL: sitofp_2i64_to_4f32:
1248; VEX:       # %bb.0:
1249; VEX-NEXT:    vpextrq $1, %xmm0, %rax
1250; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1251; VEX-NEXT:    vmovq %xmm0, %rax
1252; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1253; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1254; VEX-NEXT:    retq
1255;
1256; AVX512F-LABEL: sitofp_2i64_to_4f32:
1257; AVX512F:       # %bb.0:
1258; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1259; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1260; AVX512F-NEXT:    vmovq %xmm0, %rax
1261; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1262; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1263; AVX512F-NEXT:    retq
1264;
1265; AVX512VL-LABEL: sitofp_2i64_to_4f32:
1266; AVX512VL:       # %bb.0:
1267; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1268; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1269; AVX512VL-NEXT:    vmovq %xmm0, %rax
1270; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1271; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1272; AVX512VL-NEXT:    retq
1273;
1274; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
1275; AVX512DQ:       # %bb.0:
1276; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1277; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1278; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1279; AVX512DQ-NEXT:    vzeroupper
1280; AVX512DQ-NEXT:    retq
1281;
1282; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32:
1283; AVX512VLDQ:       # %bb.0:
1284; AVX512VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
1285; AVX512VLDQ-NEXT:    retq
1286  %cvt = sitofp <2 x i64> %a to <2 x float>
1287  %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1288  ret <4 x float> %ext
1289}
1290
1291define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
1292; SSE2-LABEL: sitofp_2i64_to_4f32_zero:
1293; SSE2:       # %bb.0:
1294; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1295; SSE2-NEXT:    movq %xmm1, %rax
1296; SSE2-NEXT:    xorps %xmm1, %xmm1
1297; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1298; SSE2-NEXT:    movq %xmm0, %rax
1299; SSE2-NEXT:    xorps %xmm0, %xmm0
1300; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1301; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1302; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1303; SSE2-NEXT:    retq
1304;
1305; SSE41-LABEL: sitofp_2i64_to_4f32_zero:
1306; SSE41:       # %bb.0:
1307; SSE41-NEXT:    movq %xmm0, %rax
1308; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
1309; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1310; SSE41-NEXT:    xorps %xmm0, %xmm0
1311; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
1312; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero
1313; SSE41-NEXT:    movaps %xmm1, %xmm0
1314; SSE41-NEXT:    retq
1315;
1316; VEX-LABEL: sitofp_2i64_to_4f32_zero:
1317; VEX:       # %bb.0:
1318; VEX-NEXT:    vmovq %xmm0, %rax
1319; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1320; VEX-NEXT:    vpextrq $1, %xmm0, %rax
1321; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1322; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1323; VEX-NEXT:    retq
1324;
1325; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
1326; AVX512F:       # %bb.0:
1327; AVX512F-NEXT:    vmovq %xmm0, %rax
1328; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1329; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1330; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1331; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1332; AVX512F-NEXT:    retq
1333;
1334; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
1335; AVX512VL:       # %bb.0:
1336; AVX512VL-NEXT:    vmovq %xmm0, %rax
1337; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1338; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1339; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1340; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
1341; AVX512VL-NEXT:    retq
1342;
1343; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero:
1344; AVX512DQ:       # %bb.0:
1345; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1346; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1347; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1348; AVX512DQ-NEXT:    vzeroupper
1349; AVX512DQ-NEXT:    retq
1350;
1351; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero:
1352; AVX512VLDQ:       # %bb.0:
1353; AVX512VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
1354; AVX512VLDQ-NEXT:    retq
1355  %cvt = sitofp <2 x i64> %a to <2 x float>
1356  %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1357  ret <4 x float> %ext
1358}
1359
1360define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1361; SSE2-LABEL: sitofp_4i64_to_4f32_undef:
1362; SSE2:       # %bb.0:
1363; SSE2-NEXT:    movq %xmm0, %rax
1364; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1365; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1366; SSE2-NEXT:    movq %xmm0, %rax
1367; SSE2-NEXT:    xorps %xmm0, %xmm0
1368; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1369; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1370; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
1371; SSE2-NEXT:    retq
1372;
1373; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
1374; SSE41:       # %bb.0:
1375; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1376; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
1377; SSE41-NEXT:    movq %xmm0, %rax
1378; SSE41-NEXT:    xorps %xmm0, %xmm0
1379; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
1380; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1381; SSE41-NEXT:    retq
1382;
1383; VEX-LABEL: sitofp_4i64_to_4f32_undef:
1384; VEX:       # %bb.0:
1385; VEX-NEXT:    vpextrq $1, %xmm0, %rax
1386; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1387; VEX-NEXT:    vmovq %xmm0, %rax
1388; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1389; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1390; VEX-NEXT:    retq
1391;
1392; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
1393; AVX512F:       # %bb.0:
1394; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1395; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1396; AVX512F-NEXT:    vmovq %xmm0, %rax
1397; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1398; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1399; AVX512F-NEXT:    retq
1400;
1401; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
1402; AVX512VL:       # %bb.0:
1403; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1404; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1405; AVX512VL-NEXT:    vmovq %xmm0, %rax
1406; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
1407; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1408; AVX512VL-NEXT:    retq
1409;
1410; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
1411; AVX512DQ:       # %bb.0:
1412; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1413; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1414; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1415; AVX512DQ-NEXT:    vzeroupper
1416; AVX512DQ-NEXT:    retq
1417;
1418; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef:
1419; AVX512VLDQ:       # %bb.0:
1420; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1421; AVX512VLDQ-NEXT:    vcvtqq2ps %ymm0, %xmm0
1422; AVX512VLDQ-NEXT:    vzeroupper
1423; AVX512VLDQ-NEXT:    retq
1424  %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1425  %cvt = sitofp <4 x i64> %ext to <4 x float>
1426  ret <4 x float> %cvt
1427}
1428
1429define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
1430; SSE-LABEL: sitofp_4i32_to_4f32:
1431; SSE:       # %bb.0:
1432; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
1433; SSE-NEXT:    retq
1434;
1435; AVX-LABEL: sitofp_4i32_to_4f32:
1436; AVX:       # %bb.0:
1437; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
1438; AVX-NEXT:    retq
1439  %cvt = sitofp <4 x i32> %a to <4 x float>
1440  ret <4 x float> %cvt
1441}
1442
1443define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
1444; SSE2-LABEL: sitofp_4i16_to_4f32:
1445; SSE2:       # %bb.0:
1446; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1447; SSE2-NEXT:    psrad $16, %xmm0
1448; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1449; SSE2-NEXT:    retq
1450;
1451; SSE41-LABEL: sitofp_4i16_to_4f32:
1452; SSE41:       # %bb.0:
1453; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
1454; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
1455; SSE41-NEXT:    retq
1456;
1457; AVX-LABEL: sitofp_4i16_to_4f32:
1458; AVX:       # %bb.0:
1459; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
1460; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
1461; AVX-NEXT:    retq
1462  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1463  %cvt = sitofp <4 x i16> %shuf to <4 x float>
1464  ret <4 x float> %cvt
1465}
1466
1467define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
1468; SSE2-LABEL: sitofp_8i16_to_4f32:
1469; SSE2:       # %bb.0:
1470; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1471; SSE2-NEXT:    psrad $16, %xmm0
1472; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1473; SSE2-NEXT:    retq
1474;
1475; SSE41-LABEL: sitofp_8i16_to_4f32:
1476; SSE41:       # %bb.0:
1477; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
1478; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
1479; SSE41-NEXT:    retq
1480;
1481; AVX-LABEL: sitofp_8i16_to_4f32:
1482; AVX:       # %bb.0:
1483; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
1484; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
1485; AVX-NEXT:    retq
1486  %cvt = sitofp <8 x i16> %a to <8 x float>
1487  %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1488  ret <4 x float> %shuf
1489}
1490
1491define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
1492; SSE2-LABEL: sitofp_4i8_to_4f32:
1493; SSE2:       # %bb.0:
1494; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1495; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1496; SSE2-NEXT:    psrad $24, %xmm0
1497; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1498; SSE2-NEXT:    retq
1499;
1500; SSE41-LABEL: sitofp_4i8_to_4f32:
1501; SSE41:       # %bb.0:
1502; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
1503; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
1504; SSE41-NEXT:    retq
1505;
1506; AVX-LABEL: sitofp_4i8_to_4f32:
1507; AVX:       # %bb.0:
1508; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
1509; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
1510; AVX-NEXT:    retq
1511  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1512  %cvt = sitofp <4 x i8> %shuf to <4 x float>
1513  ret <4 x float> %cvt
1514}
1515
1516define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
1517; SSE2-LABEL: sitofp_16i8_to_4f32:
1518; SSE2:       # %bb.0:
1519; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1520; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1521; SSE2-NEXT:    psrad $24, %xmm0
1522; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1523; SSE2-NEXT:    retq
1524;
1525; SSE41-LABEL: sitofp_16i8_to_4f32:
1526; SSE41:       # %bb.0:
1527; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
1528; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
1529; SSE41-NEXT:    retq
1530;
1531; AVX-LABEL: sitofp_16i8_to_4f32:
1532; AVX:       # %bb.0:
1533; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
1534; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
1535; AVX-NEXT:    retq
1536  %cvt = sitofp <16 x i8> %a to <16 x float>
1537  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1538  ret <4 x float> %shuf
1539}
1540
1541define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
1542; SSE2-LABEL: sitofp_4i64_to_4f32:
1543; SSE2:       # %bb.0:
1544; SSE2-NEXT:    movq %xmm1, %rax
1545; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
1546; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1547; SSE2-NEXT:    movq %xmm1, %rax
1548; SSE2-NEXT:    xorps %xmm1, %xmm1
1549; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1550; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1551; SSE2-NEXT:    movq %xmm0, %rax
1552; SSE2-NEXT:    xorps %xmm1, %xmm1
1553; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1554; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1555; SSE2-NEXT:    movq %xmm0, %rax
1556; SSE2-NEXT:    xorps %xmm0, %xmm0
1557; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1558; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1559; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1560; SSE2-NEXT:    movaps %xmm1, %xmm0
1561; SSE2-NEXT:    retq
1562;
1563; SSE41-LABEL: sitofp_4i64_to_4f32:
1564; SSE41:       # %bb.0:
1565; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1566; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
1567; SSE41-NEXT:    movq %xmm0, %rax
1568; SSE41-NEXT:    xorps %xmm0, %xmm0
1569; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
1570; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
1571; SSE41-NEXT:    movq %xmm1, %rax
1572; SSE41-NEXT:    xorps %xmm2, %xmm2
1573; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
1574; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
1575; SSE41-NEXT:    pextrq $1, %xmm1, %rax
1576; SSE41-NEXT:    xorps %xmm1, %xmm1
1577; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
1578; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
1579; SSE41-NEXT:    retq
1580;
1581; AVX1-LABEL: sitofp_4i64_to_4f32:
1582; AVX1:       # %bb.0:
1583; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
1584; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1585; AVX1-NEXT:    vmovq %xmm0, %rax
1586; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
1587; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1588; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1589; AVX1-NEXT:    vmovq %xmm0, %rax
1590; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1591; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1592; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
1593; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
1594; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1595; AVX1-NEXT:    vzeroupper
1596; AVX1-NEXT:    retq
1597;
1598; AVX2-LABEL: sitofp_4i64_to_4f32:
1599; AVX2:       # %bb.0:
1600; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
1601; AVX2-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1602; AVX2-NEXT:    vmovq %xmm0, %rax
1603; AVX2-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
1604; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1605; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1606; AVX2-NEXT:    vmovq %xmm0, %rax
1607; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1608; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1609; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
1610; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
1611; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1612; AVX2-NEXT:    vzeroupper
1613; AVX2-NEXT:    retq
1614;
1615; AVX512F-LABEL: sitofp_4i64_to_4f32:
1616; AVX512F:       # %bb.0:
1617; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1618; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1619; AVX512F-NEXT:    vmovq %xmm0, %rax
1620; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
1621; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1622; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1623; AVX512F-NEXT:    vmovq %xmm0, %rax
1624; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1625; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1626; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1627; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
1628; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1629; AVX512F-NEXT:    vzeroupper
1630; AVX512F-NEXT:    retq
1631;
1632; AVX512VL-LABEL: sitofp_4i64_to_4f32:
1633; AVX512VL:       # %bb.0:
1634; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1635; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1636; AVX512VL-NEXT:    vmovq %xmm0, %rax
1637; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
1638; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1639; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
1640; AVX512VL-NEXT:    vmovq %xmm0, %rax
1641; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1642; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1643; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1644; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
1645; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1646; AVX512VL-NEXT:    vzeroupper
1647; AVX512VL-NEXT:    retq
1648;
1649; AVX512DQ-LABEL: sitofp_4i64_to_4f32:
1650; AVX512DQ:       # %bb.0:
1651; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1652; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1653; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1654; AVX512DQ-NEXT:    vzeroupper
1655; AVX512DQ-NEXT:    retq
1656;
1657; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32:
1658; AVX512VLDQ:       # %bb.0:
1659; AVX512VLDQ-NEXT:    vcvtqq2ps %ymm0, %xmm0
1660; AVX512VLDQ-NEXT:    vzeroupper
1661; AVX512VLDQ-NEXT:    retq
1662  %cvt = sitofp <4 x i64> %a to <4 x float>
1663  ret <4 x float> %cvt
1664}
1665
1666define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
1667; SSE-LABEL: sitofp_8i32_to_8f32:
1668; SSE:       # %bb.0:
1669; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
1670; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
1671; SSE-NEXT:    retq
1672;
1673; AVX-LABEL: sitofp_8i32_to_8f32:
1674; AVX:       # %bb.0:
1675; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
1676; AVX-NEXT:    retq
1677  %cvt = sitofp <8 x i32> %a to <8 x float>
1678  ret <8 x float> %cvt
1679}
1680
1681define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
1682; SSE2-LABEL: sitofp_8i16_to_8f32:
1683; SSE2:       # %bb.0:
1684; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1685; SSE2-NEXT:    psrad $16, %xmm1
1686; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm2
1687; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1688; SSE2-NEXT:    psrad $16, %xmm0
1689; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
1690; SSE2-NEXT:    movaps %xmm2, %xmm0
1691; SSE2-NEXT:    retq
1692;
1693; SSE41-LABEL: sitofp_8i16_to_8f32:
1694; SSE41:       # %bb.0:
1695; SSE41-NEXT:    pmovsxwd %xmm0, %xmm1
1696; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm2
1697; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1698; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
1699; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
1700; SSE41-NEXT:    movaps %xmm2, %xmm0
1701; SSE41-NEXT:    retq
1702;
1703; AVX1-LABEL: sitofp_8i16_to_8f32:
1704; AVX1:       # %bb.0:
1705; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
1706; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1707; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
1708; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1709; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
1710; AVX1-NEXT:    retq
1711;
1712; AVX2-LABEL: sitofp_8i16_to_8f32:
1713; AVX2:       # %bb.0:
1714; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
1715; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
1716; AVX2-NEXT:    retq
1717;
1718; AVX512-LABEL: sitofp_8i16_to_8f32:
1719; AVX512:       # %bb.0:
1720; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
1721; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
1722; AVX512-NEXT:    retq
1723  %cvt = sitofp <8 x i16> %a to <8 x float>
1724  ret <8 x float> %cvt
1725}
1726
1727define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
1728; SSE2-LABEL: sitofp_8i8_to_8f32:
1729; SSE2:       # %bb.0:
1730; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1731; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1732; SSE2-NEXT:    psrad $24, %xmm0
1733; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1734; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1735; SSE2-NEXT:    psrad $24, %xmm1
1736; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
1737; SSE2-NEXT:    retq
1738;
1739; SSE41-LABEL: sitofp_8i8_to_8f32:
1740; SSE41:       # %bb.0:
1741; SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
1742; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm2
1743; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1744; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
1745; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
1746; SSE41-NEXT:    movaps %xmm2, %xmm0
1747; SSE41-NEXT:    retq
1748;
1749; AVX1-LABEL: sitofp_8i8_to_8f32:
1750; AVX1:       # %bb.0:
1751; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
1752; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1753; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
1754; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1755; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
1756; AVX1-NEXT:    retq
1757;
1758; AVX2-LABEL: sitofp_8i8_to_8f32:
1759; AVX2:       # %bb.0:
1760; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
1761; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
1762; AVX2-NEXT:    retq
1763;
1764; AVX512-LABEL: sitofp_8i8_to_8f32:
1765; AVX512:       # %bb.0:
1766; AVX512-NEXT:    vpmovsxbd %xmm0, %ymm0
1767; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
1768; AVX512-NEXT:    retq
1769  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1770  %cvt = sitofp <8 x i8> %shuf to <8 x float>
1771  ret <8 x float> %cvt
1772}
1773
1774define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
1775; SSE2-LABEL: sitofp_16i8_to_8f32:
1776; SSE2:       # %bb.0:
1777; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1778; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1779; SSE2-NEXT:    psrad $24, %xmm0
1780; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
1781; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1782; SSE2-NEXT:    psrad $24, %xmm1
1783; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
1784; SSE2-NEXT:    retq
1785;
1786; SSE41-LABEL: sitofp_16i8_to_8f32:
1787; SSE41:       # %bb.0:
1788; SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
1789; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm2
1790; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1791; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
1792; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
1793; SSE41-NEXT:    movaps %xmm2, %xmm0
1794; SSE41-NEXT:    retq
1795;
1796; AVX1-LABEL: sitofp_16i8_to_8f32:
1797; AVX1:       # %bb.0:
1798; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
1799; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1800; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
1801; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1802; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
1803; AVX1-NEXT:    retq
1804;
1805; AVX2-LABEL: sitofp_16i8_to_8f32:
1806; AVX2:       # %bb.0:
1807; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
1808; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
1809; AVX2-NEXT:    retq
1810;
1811; AVX512-LABEL: sitofp_16i8_to_8f32:
1812; AVX512:       # %bb.0:
1813; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1814; AVX512-NEXT:    vcvtdq2ps %zmm0, %zmm0
1815; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1816; AVX512-NEXT:    retq
1817  %cvt = sitofp <16 x i8> %a to <16 x float>
1818  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1819  ret <8 x float> %shuf
1820}
1821
1822;
1823; Unsigned Integer to Float
1824;
1825
1826define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
1827; SSE2-LABEL: uitofp_2i64_to_4f32:
1828; SSE2:       # %bb.0:
1829; SSE2-NEXT:    movdqa %xmm0, %xmm1
1830; SSE2-NEXT:    movq %xmm0, %rax
1831; SSE2-NEXT:    testq %rax, %rax
1832; SSE2-NEXT:    js .LBB41_1
1833; SSE2-NEXT:  # %bb.2:
1834; SSE2-NEXT:    xorps %xmm0, %xmm0
1835; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1836; SSE2-NEXT:    jmp .LBB41_3
1837; SSE2-NEXT:  .LBB41_1:
1838; SSE2-NEXT:    movq %rax, %rcx
1839; SSE2-NEXT:    shrq %rcx
1840; SSE2-NEXT:    andl $1, %eax
1841; SSE2-NEXT:    orq %rcx, %rax
1842; SSE2-NEXT:    xorps %xmm0, %xmm0
1843; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1844; SSE2-NEXT:    addss %xmm0, %xmm0
1845; SSE2-NEXT:  .LBB41_3:
1846; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1847; SSE2-NEXT:    movq %xmm1, %rax
1848; SSE2-NEXT:    testq %rax, %rax
1849; SSE2-NEXT:    js .LBB41_4
1850; SSE2-NEXT:  # %bb.5:
1851; SSE2-NEXT:    xorps %xmm1, %xmm1
1852; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1853; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1854; SSE2-NEXT:    retq
1855; SSE2-NEXT:  .LBB41_4:
1856; SSE2-NEXT:    movq %rax, %rcx
1857; SSE2-NEXT:    shrq %rcx
1858; SSE2-NEXT:    andl $1, %eax
1859; SSE2-NEXT:    orq %rcx, %rax
1860; SSE2-NEXT:    xorps %xmm1, %xmm1
1861; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1862; SSE2-NEXT:    addss %xmm1, %xmm1
1863; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1864; SSE2-NEXT:    retq
1865;
1866; SSE41-LABEL: uitofp_2i64_to_4f32:
1867; SSE41:       # %bb.0:
1868; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [1,1]
1869; SSE41-NEXT:    pand %xmm0, %xmm1
1870; SSE41-NEXT:    movdqa %xmm0, %xmm2
1871; SSE41-NEXT:    psrlq $1, %xmm2
1872; SSE41-NEXT:    por %xmm1, %xmm2
1873; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
1874; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm0
1875; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1876; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
1877; SSE41-NEXT:    movq %xmm0, %rax
1878; SSE41-NEXT:    xorps %xmm2, %xmm2
1879; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
1880; SSE41-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero
1881; SSE41-NEXT:    movaps %xmm2, %xmm3
1882; SSE41-NEXT:    addps %xmm2, %xmm3
1883; SSE41-NEXT:    movdqa %xmm1, %xmm0
1884; SSE41-NEXT:    blendvps %xmm0, %xmm3, %xmm2
1885; SSE41-NEXT:    movaps %xmm2, %xmm0
1886; SSE41-NEXT:    retq
1887;
1888; VEX-LABEL: uitofp_2i64_to_4f32:
1889; VEX:       # %bb.0:
1890; VEX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1891; VEX-NEXT:    vpsrlq $1, %xmm0, %xmm2
1892; VEX-NEXT:    vpor %xmm1, %xmm2, %xmm1
1893; VEX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
1894; VEX-NEXT:    vpextrq $1, %xmm1, %rax
1895; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1896; VEX-NEXT:    vmovq %xmm1, %rax
1897; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
1898; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
1899; VEX-NEXT:    vaddps %xmm1, %xmm1, %xmm2
1900; VEX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1901; VEX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
1902; VEX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1903; VEX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1904; VEX-NEXT:    retq
1905;
1906; AVX512F-LABEL: uitofp_2i64_to_4f32:
1907; AVX512F:       # %bb.0:
1908; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
1909; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
1910; AVX512F-NEXT:    vmovq %xmm0, %rax
1911; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
1912; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1913; AVX512F-NEXT:    retq
1914;
1915; AVX512VL-LABEL: uitofp_2i64_to_4f32:
1916; AVX512VL:       # %bb.0:
1917; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
1918; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
1919; AVX512VL-NEXT:    vmovq %xmm0, %rax
1920; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
1921; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1922; AVX512VL-NEXT:    retq
1923;
1924; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
1925; AVX512DQ:       # %bb.0:
1926; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1927; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1928; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1929; AVX512DQ-NEXT:    vzeroupper
1930; AVX512DQ-NEXT:    retq
1931;
1932; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32:
1933; AVX512VLDQ:       # %bb.0:
1934; AVX512VLDQ-NEXT:    vcvtuqq2ps %xmm0, %xmm0
1935; AVX512VLDQ-NEXT:    retq
1936  %cvt = uitofp <2 x i64> %a to <2 x float>
1937  %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1938  ret <4 x float> %ext
1939}
1940
1941define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
1942; SSE2-LABEL: uitofp_2i64_to_2f32:
1943; SSE2:       # %bb.0:
1944; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1945; SSE2-NEXT:    movq %xmm1, %rax
1946; SSE2-NEXT:    testq %rax, %rax
1947; SSE2-NEXT:    js .LBB42_1
1948; SSE2-NEXT:  # %bb.2:
1949; SSE2-NEXT:    xorps %xmm1, %xmm1
1950; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1951; SSE2-NEXT:    jmp .LBB42_3
1952; SSE2-NEXT:  .LBB42_1:
1953; SSE2-NEXT:    movq %rax, %rcx
1954; SSE2-NEXT:    shrq %rcx
1955; SSE2-NEXT:    andl $1, %eax
1956; SSE2-NEXT:    orq %rcx, %rax
1957; SSE2-NEXT:    xorps %xmm1, %xmm1
1958; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
1959; SSE2-NEXT:    addss %xmm1, %xmm1
1960; SSE2-NEXT:  .LBB42_3:
1961; SSE2-NEXT:    movq %xmm0, %rax
1962; SSE2-NEXT:    testq %rax, %rax
1963; SSE2-NEXT:    js .LBB42_4
1964; SSE2-NEXT:  # %bb.5:
1965; SSE2-NEXT:    xorps %xmm0, %xmm0
1966; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1967; SSE2-NEXT:    jmp .LBB42_6
1968; SSE2-NEXT:  .LBB42_4:
1969; SSE2-NEXT:    movq %rax, %rcx
1970; SSE2-NEXT:    shrq %rcx
1971; SSE2-NEXT:    andl $1, %eax
1972; SSE2-NEXT:    orq %rcx, %rax
1973; SSE2-NEXT:    xorps %xmm0, %xmm0
1974; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
1975; SSE2-NEXT:    addss %xmm0, %xmm0
1976; SSE2-NEXT:  .LBB42_6:
1977; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1978; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1979; SSE2-NEXT:    retq
1980;
1981; SSE41-LABEL: uitofp_2i64_to_2f32:
1982; SSE41:       # %bb.0:
1983; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [1,1]
1984; SSE41-NEXT:    pand %xmm0, %xmm1
1985; SSE41-NEXT:    movdqa %xmm0, %xmm2
1986; SSE41-NEXT:    psrlq $1, %xmm2
1987; SSE41-NEXT:    por %xmm1, %xmm2
1988; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
1989; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm0
1990; SSE41-NEXT:    pextrq $1, %xmm0, %rax
1991; SSE41-NEXT:    xorps %xmm2, %xmm2
1992; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
1993; SSE41-NEXT:    movq %xmm0, %rax
1994; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
1995; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0],xmm2[0],zero,zero
1996; SSE41-NEXT:    movaps %xmm3, %xmm2
1997; SSE41-NEXT:    addps %xmm3, %xmm2
1998; SSE41-NEXT:    movdqa %xmm1, %xmm0
1999; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm3
2000; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm3[0],zero
2001; SSE41-NEXT:    retq
2002;
2003; VEX-LABEL: uitofp_2i64_to_2f32:
2004; VEX:       # %bb.0:
2005; VEX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2006; VEX-NEXT:    vpsrlq $1, %xmm0, %xmm2
2007; VEX-NEXT:    vpor %xmm1, %xmm2, %xmm1
2008; VEX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
2009; VEX-NEXT:    vpextrq $1, %xmm1, %rax
2010; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
2011; VEX-NEXT:    vmovq %xmm1, %rax
2012; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
2013; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
2014; VEX-NEXT:    vaddps %xmm1, %xmm1, %xmm2
2015; VEX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2016; VEX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
2017; VEX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
2018; VEX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2019; VEX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2020; VEX-NEXT:    retq
2021;
2022; AVX512F-LABEL: uitofp_2i64_to_2f32:
2023; AVX512F:       # %bb.0:
2024; AVX512F-NEXT:    vmovq %xmm0, %rax
2025; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2026; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
2027; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
2028; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
2029; AVX512F-NEXT:    retq
2030;
2031; AVX512VL-LABEL: uitofp_2i64_to_2f32:
2032; AVX512VL:       # %bb.0:
2033; AVX512VL-NEXT:    vmovq %xmm0, %rax
2034; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2035; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
2036; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
2037; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
2038; AVX512VL-NEXT:    retq
2039;
2040; AVX512DQ-LABEL: uitofp_2i64_to_2f32:
2041; AVX512DQ:       # %bb.0:
2042; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2043; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
2044; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2045; AVX512DQ-NEXT:    vzeroupper
2046; AVX512DQ-NEXT:    retq
2047;
2048; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32:
2049; AVX512VLDQ:       # %bb.0:
2050; AVX512VLDQ-NEXT:    vcvtuqq2ps %xmm0, %xmm0
2051; AVX512VLDQ-NEXT:    retq
2052  %cvt = uitofp <2 x i64> %a to <2 x float>
2053  %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2054  ret <4 x float> %ext
2055}
2056
2057define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
2058; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
2059; SSE2:       # %bb.0:
2060; SSE2-NEXT:    movq %xmm0, %rax
2061; SSE2-NEXT:    testq %rax, %rax
2062; SSE2-NEXT:    js .LBB43_1
2063; SSE2-NEXT:  # %bb.2:
2064; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
2065; SSE2-NEXT:    jmp .LBB43_3
2066; SSE2-NEXT:  .LBB43_1:
2067; SSE2-NEXT:    movq %rax, %rcx
2068; SSE2-NEXT:    shrq %rcx
2069; SSE2-NEXT:    andl $1, %eax
2070; SSE2-NEXT:    orq %rcx, %rax
2071; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
2072; SSE2-NEXT:    addss %xmm1, %xmm1
2073; SSE2-NEXT:  .LBB43_3:
2074; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2075; SSE2-NEXT:    movq %xmm0, %rax
2076; SSE2-NEXT:    testq %rax, %rax
2077; SSE2-NEXT:    js .LBB43_4
2078; SSE2-NEXT:  # %bb.5:
2079; SSE2-NEXT:    xorps %xmm0, %xmm0
2080; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
2081; SSE2-NEXT:    jmp .LBB43_6
2082; SSE2-NEXT:  .LBB43_4:
2083; SSE2-NEXT:    movq %rax, %rcx
2084; SSE2-NEXT:    shrq %rcx
2085; SSE2-NEXT:    andl $1, %eax
2086; SSE2-NEXT:    orq %rcx, %rax
2087; SSE2-NEXT:    xorps %xmm0, %xmm0
2088; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
2089; SSE2-NEXT:    addss %xmm0, %xmm0
2090; SSE2-NEXT:  .LBB43_6:
2091; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2092; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2093; SSE2-NEXT:    retq
2094;
2095; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
2096; SSE41:       # %bb.0:
2097; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [1,1]
2098; SSE41-NEXT:    pand %xmm0, %xmm1
2099; SSE41-NEXT:    movdqa %xmm0, %xmm2
2100; SSE41-NEXT:    psrlq $1, %xmm2
2101; SSE41-NEXT:    por %xmm1, %xmm2
2102; SSE41-NEXT:    movdqa %xmm0, %xmm1
2103; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
2104; SSE41-NEXT:    pextrq $1, %xmm1, %rax
2105; SSE41-NEXT:    xorps %xmm2, %xmm2
2106; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
2107; SSE41-NEXT:    movq %xmm1, %rax
2108; SSE41-NEXT:    xorps %xmm1, %xmm1
2109; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
2110; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
2111; SSE41-NEXT:    movaps %xmm1, %xmm2
2112; SSE41-NEXT:    addps %xmm1, %xmm2
2113; SSE41-NEXT:    xorps %xmm3, %xmm3
2114; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[2,3]
2115; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
2116; SSE41-NEXT:    movaps %xmm1, %xmm0
2117; SSE41-NEXT:    retq
2118;
2119; AVX1-LABEL: uitofp_4i64_to_4f32_undef:
2120; AVX1:       # %bb.0:
2121; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2122; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
2123; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
2124; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
2125; AVX1-NEXT:    vmovaps %xmm0, %xmm2
2126; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm1
2127; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
2128; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
2129; AVX1-NEXT:    vmovq %xmm1, %rax
2130; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
2131; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
2132; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2133; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2134; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
2135; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
2136; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
2137; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2138; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2139; AVX1-NEXT:    vzeroupper
2140; AVX1-NEXT:    retq
2141;
2142; AVX2-LABEL: uitofp_4i64_to_4f32_undef:
2143; AVX2:       # %bb.0:
2144; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2145; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2146; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm1
2147; AVX2-NEXT:    vpsrlq $1, %ymm0, %ymm2
2148; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
2149; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2150; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
2151; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
2152; AVX2-NEXT:    vmovq %xmm1, %rax
2153; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
2154; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2155; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
2156; AVX2-NEXT:    vmovq %xmm1, %rax
2157; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
2158; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
2159; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
2160; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm1
2161; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
2162; AVX2-NEXT:    vaddps %xmm1, %xmm1, %xmm2
2163; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2164; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2165; AVX2-NEXT:    vzeroupper
2166; AVX2-NEXT:    retq
2167;
2168; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
2169; AVX512F:       # %bb.0:
2170; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
2171; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2172; AVX512F-NEXT:    vmovq %xmm0, %rax
2173; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
2174; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
2175; AVX512F-NEXT:    retq
2176;
2177; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
2178; AVX512VL:       # %bb.0:
2179; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
2180; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2181; AVX512VL-NEXT:    vmovq %xmm0, %rax
2182; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
2183; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
2184; AVX512VL-NEXT:    retq
2185;
2186; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
2187; AVX512DQ:       # %bb.0:
2188; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2189; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
2190; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2191; AVX512DQ-NEXT:    vzeroupper
2192; AVX512DQ-NEXT:    retq
2193;
2194; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef:
2195; AVX512VLDQ:       # %bb.0:
2196; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2197; AVX512VLDQ-NEXT:    vcvtuqq2ps %ymm0, %xmm0
2198; AVX512VLDQ-NEXT:    vzeroupper
2199; AVX512VLDQ-NEXT:    retq
2200  %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2201  %cvt = uitofp <4 x i64> %ext to <4 x float>
2202  ret <4 x float> %cvt
2203}
2204
2205define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
2206; SSE2-LABEL: uitofp_4i32_to_4f32:
2207; SSE2:       # %bb.0:
2208; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
2209; SSE2-NEXT:    pand %xmm0, %xmm1
2210; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2211; SSE2-NEXT:    psrld $16, %xmm0
2212; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2213; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2214; SSE2-NEXT:    addps %xmm1, %xmm0
2215; SSE2-NEXT:    retq
2216;
2217; SSE41-LABEL: uitofp_4i32_to_4f32:
2218; SSE41:       # %bb.0:
2219; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
2220; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2221; SSE41-NEXT:    psrld $16, %xmm0
2222; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2223; SSE41-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2224; SSE41-NEXT:    addps %xmm1, %xmm0
2225; SSE41-NEXT:    retq
2226;
2227; AVX1-LABEL: uitofp_4i32_to_4f32:
2228; AVX1:       # %bb.0:
2229; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2230; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
2231; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
2232; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2233; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
2234; AVX1-NEXT:    retq
2235;
2236; AVX2-LABEL: uitofp_4i32_to_4f32:
2237; AVX2:       # %bb.0:
2238; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
2239; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2240; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
2241; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
2242; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
2243; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2244; AVX2-NEXT:    vsubps %xmm2, %xmm0, %xmm0
2245; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
2246; AVX2-NEXT:    retq
2247;
2248; AVX512F-LABEL: uitofp_4i32_to_4f32:
2249; AVX512F:       # %bb.0:
2250; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2251; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
2252; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2253; AVX512F-NEXT:    vzeroupper
2254; AVX512F-NEXT:    retq
2255;
2256; AVX512VL-LABEL: uitofp_4i32_to_4f32:
2257; AVX512VL:       # %bb.0:
2258; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
2259; AVX512VL-NEXT:    retq
2260;
2261; AVX512DQ-LABEL: uitofp_4i32_to_4f32:
2262; AVX512DQ:       # %bb.0:
2263; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2264; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
2265; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2266; AVX512DQ-NEXT:    vzeroupper
2267; AVX512DQ-NEXT:    retq
2268;
2269; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32:
2270; AVX512VLDQ:       # %bb.0:
2271; AVX512VLDQ-NEXT:    vcvtudq2ps %xmm0, %xmm0
2272; AVX512VLDQ-NEXT:    retq
2273  %cvt = uitofp <4 x i32> %a to <4 x float>
2274  ret <4 x float> %cvt
2275}
2276
2277define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
2278; SSE2-LABEL: uitofp_4i16_to_4f32:
2279; SSE2:       # %bb.0:
2280; SSE2-NEXT:    pxor %xmm1, %xmm1
2281; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2282; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
2283; SSE2-NEXT:    retq
2284;
2285; SSE41-LABEL: uitofp_4i16_to_4f32:
2286; SSE41:       # %bb.0:
2287; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2288; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
2289; SSE41-NEXT:    retq
2290;
2291; AVX-LABEL: uitofp_4i16_to_4f32:
2292; AVX:       # %bb.0:
2293; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2294; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
2295; AVX-NEXT:    retq
2296  %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2297  %cvt = uitofp <4 x i16> %shuf to <4 x float>
2298  ret <4 x float> %cvt
2299}
2300
2301define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
2302; SSE2-LABEL: uitofp_8i16_to_4f32:
2303; SSE2:       # %bb.0:
2304; SSE2-NEXT:    pxor %xmm1, %xmm1
2305; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2306; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
2307; SSE2-NEXT:    retq
2308;
2309; SSE41-LABEL: uitofp_8i16_to_4f32:
2310; SSE41:       # %bb.0:
2311; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2312; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
2313; SSE41-NEXT:    retq
2314;
2315; AVX-LABEL: uitofp_8i16_to_4f32:
2316; AVX:       # %bb.0:
2317; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2318; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
2319; AVX-NEXT:    retq
2320  %cvt = uitofp <8 x i16> %a to <8 x float>
2321  %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2322  ret <4 x float> %shuf
2323}
2324
2325define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
2326; SSE2-LABEL: uitofp_4i8_to_4f32:
2327; SSE2:       # %bb.0:
2328; SSE2-NEXT:    pxor %xmm1, %xmm1
2329; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2330; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2331; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
2332; SSE2-NEXT:    retq
2333;
2334; SSE41-LABEL: uitofp_4i8_to_4f32:
2335; SSE41:       # %bb.0:
2336; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2337; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
2338; SSE41-NEXT:    retq
2339;
2340; AVX-LABEL: uitofp_4i8_to_4f32:
2341; AVX:       # %bb.0:
2342; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2343; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
2344; AVX-NEXT:    retq
2345  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2346  %cvt = uitofp <4 x i8> %shuf to <4 x float>
2347  ret <4 x float> %cvt
2348}
2349
2350define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
2351; SSE2-LABEL: uitofp_16i8_to_4f32:
2352; SSE2:       # %bb.0:
2353; SSE2-NEXT:    pxor %xmm1, %xmm1
2354; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2355; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2356; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
2357; SSE2-NEXT:    retq
2358;
2359; SSE41-LABEL: uitofp_16i8_to_4f32:
2360; SSE41:       # %bb.0:
2361; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2362; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
2363; SSE41-NEXT:    retq
2364;
2365; AVX-LABEL: uitofp_16i8_to_4f32:
2366; AVX:       # %bb.0:
2367; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2368; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
2369; AVX-NEXT:    retq
2370  %cvt = uitofp <16 x i8> %a to <16 x float>
2371  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2372  ret <4 x float> %shuf
2373}
2374
2375define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
2376; SSE2-LABEL: uitofp_4i64_to_4f32:
2377; SSE2:       # %bb.0:
2378; SSE2-NEXT:    movq %xmm1, %rax
2379; SSE2-NEXT:    testq %rax, %rax
2380; SSE2-NEXT:    js .LBB49_1
2381; SSE2-NEXT:  # %bb.2:
2382; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
2383; SSE2-NEXT:    jmp .LBB49_3
2384; SSE2-NEXT:  .LBB49_1:
2385; SSE2-NEXT:    movq %rax, %rcx
2386; SSE2-NEXT:    shrq %rcx
2387; SSE2-NEXT:    andl $1, %eax
2388; SSE2-NEXT:    orq %rcx, %rax
2389; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
2390; SSE2-NEXT:    addss %xmm2, %xmm2
2391; SSE2-NEXT:  .LBB49_3:
2392; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2393; SSE2-NEXT:    movq %xmm1, %rax
2394; SSE2-NEXT:    testq %rax, %rax
2395; SSE2-NEXT:    js .LBB49_4
2396; SSE2-NEXT:  # %bb.5:
2397; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
2398; SSE2-NEXT:    jmp .LBB49_6
2399; SSE2-NEXT:  .LBB49_4:
2400; SSE2-NEXT:    movq %rax, %rcx
2401; SSE2-NEXT:    shrq %rcx
2402; SSE2-NEXT:    andl $1, %eax
2403; SSE2-NEXT:    orq %rcx, %rax
2404; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
2405; SSE2-NEXT:    addss %xmm3, %xmm3
2406; SSE2-NEXT:  .LBB49_6:
2407; SSE2-NEXT:    movq %xmm0, %rax
2408; SSE2-NEXT:    testq %rax, %rax
2409; SSE2-NEXT:    js .LBB49_7
2410; SSE2-NEXT:  # %bb.8:
2411; SSE2-NEXT:    xorps %xmm1, %xmm1
2412; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
2413; SSE2-NEXT:    jmp .LBB49_9
2414; SSE2-NEXT:  .LBB49_7:
2415; SSE2-NEXT:    movq %rax, %rcx
2416; SSE2-NEXT:    shrq %rcx
2417; SSE2-NEXT:    andl $1, %eax
2418; SSE2-NEXT:    orq %rcx, %rax
2419; SSE2-NEXT:    xorps %xmm1, %xmm1
2420; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
2421; SSE2-NEXT:    addss %xmm1, %xmm1
2422; SSE2-NEXT:  .LBB49_9:
2423; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2424; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2425; SSE2-NEXT:    movq %xmm0, %rax
2426; SSE2-NEXT:    testq %rax, %rax
2427; SSE2-NEXT:    js .LBB49_10
2428; SSE2-NEXT:  # %bb.11:
2429; SSE2-NEXT:    xorps %xmm0, %xmm0
2430; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
2431; SSE2-NEXT:    jmp .LBB49_12
2432; SSE2-NEXT:  .LBB49_10:
2433; SSE2-NEXT:    movq %rax, %rcx
2434; SSE2-NEXT:    shrq %rcx
2435; SSE2-NEXT:    andl $1, %eax
2436; SSE2-NEXT:    orq %rcx, %rax
2437; SSE2-NEXT:    xorps %xmm0, %xmm0
2438; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
2439; SSE2-NEXT:    addss %xmm0, %xmm0
2440; SSE2-NEXT:  .LBB49_12:
2441; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2442; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2443; SSE2-NEXT:    movaps %xmm1, %xmm0
2444; SSE2-NEXT:    retq
2445;
2446; SSE41-LABEL: uitofp_4i64_to_4f32:
2447; SSE41:       # %bb.0:
2448; SSE41-NEXT:    movdqa %xmm1, %xmm2
2449; SSE41-NEXT:    movdqa %xmm0, %xmm1
2450; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm4 = [1,1]
2451; SSE41-NEXT:    pand %xmm4, %xmm0
2452; SSE41-NEXT:    movdqa %xmm1, %xmm3
2453; SSE41-NEXT:    psrlq $1, %xmm3
2454; SSE41-NEXT:    por %xmm0, %xmm3
2455; SSE41-NEXT:    movdqa %xmm1, %xmm5
2456; SSE41-NEXT:    movdqa %xmm1, %xmm0
2457; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm5
2458; SSE41-NEXT:    pextrq $1, %xmm5, %rax
2459; SSE41-NEXT:    xorps %xmm0, %xmm0
2460; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
2461; SSE41-NEXT:    movq %xmm5, %rax
2462; SSE41-NEXT:    xorps %xmm3, %xmm3
2463; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
2464; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
2465; SSE41-NEXT:    pand %xmm2, %xmm4
2466; SSE41-NEXT:    movdqa %xmm2, %xmm5
2467; SSE41-NEXT:    psrlq $1, %xmm5
2468; SSE41-NEXT:    por %xmm4, %xmm5
2469; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
2470; SSE41-NEXT:    movaps %xmm2, %xmm0
2471; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
2472; SSE41-NEXT:    movq %xmm2, %rax
2473; SSE41-NEXT:    xorps %xmm0, %xmm0
2474; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
2475; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
2476; SSE41-NEXT:    pextrq $1, %xmm2, %rax
2477; SSE41-NEXT:    xorps %xmm0, %xmm0
2478; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
2479; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
2480; SSE41-NEXT:    movaps %xmm3, %xmm2
2481; SSE41-NEXT:    addps %xmm3, %xmm2
2482; SSE41-NEXT:    movaps %xmm1, %xmm0
2483; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm3
2484; SSE41-NEXT:    movaps %xmm3, %xmm0
2485; SSE41-NEXT:    retq
2486;
2487; AVX1-LABEL: uitofp_4i64_to_4f32:
2488; AVX1:       # %bb.0:
2489; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm1
2490; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2491; AVX1-NEXT:    vpsrlq $1, %xmm2, %xmm3
2492; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2493; AVX1-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
2494; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
2495; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2496; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
2497; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
2498; AVX1-NEXT:    vmovq %xmm1, %rax
2499; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
2500; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
2501; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
2502; AVX1-NEXT:    vmovq %xmm1, %rax
2503; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
2504; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
2505; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
2506; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
2507; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
2508; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
2509; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2510; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
2511; AVX1-NEXT:    vzeroupper
2512; AVX1-NEXT:    retq
2513;
2514; AVX2-LABEL: uitofp_4i64_to_4f32:
2515; AVX2:       # %bb.0:
2516; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2517; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm1
2518; AVX2-NEXT:    vpsrlq $1, %ymm0, %ymm2
2519; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
2520; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2521; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
2522; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
2523; AVX2-NEXT:    vmovq %xmm1, %rax
2524; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
2525; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2526; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
2527; AVX2-NEXT:    vmovq %xmm1, %rax
2528; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
2529; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
2530; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
2531; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm1
2532; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
2533; AVX2-NEXT:    vaddps %xmm1, %xmm1, %xmm2
2534; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
2535; AVX2-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
2536; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
2537; AVX2-NEXT:    vzeroupper
2538; AVX2-NEXT:    retq
2539;
2540; AVX512F-LABEL: uitofp_4i64_to_4f32:
2541; AVX512F:       # %bb.0:
2542; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
2543; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2544; AVX512F-NEXT:    vmovq %xmm0, %rax
2545; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
2546; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2547; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
2548; AVX512F-NEXT:    vmovq %xmm0, %rax
2549; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
2550; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2551; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
2552; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
2553; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2554; AVX512F-NEXT:    vzeroupper
2555; AVX512F-NEXT:    retq
2556;
2557; AVX512VL-LABEL: uitofp_4i64_to_4f32:
2558; AVX512VL:       # %bb.0:
2559; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
2560; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
2561; AVX512VL-NEXT:    vmovq %xmm0, %rax
2562; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
2563; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2564; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
2565; AVX512VL-NEXT:    vmovq %xmm0, %rax
2566; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
2567; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2568; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
2569; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
2570; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2571; AVX512VL-NEXT:    vzeroupper
2572; AVX512VL-NEXT:    retq
2573;
2574; AVX512DQ-LABEL: uitofp_4i64_to_4f32:
2575; AVX512DQ:       # %bb.0:
2576; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2577; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
2578; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2579; AVX512DQ-NEXT:    vzeroupper
2580; AVX512DQ-NEXT:    retq
2581;
2582; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32:
2583; AVX512VLDQ:       # %bb.0:
2584; AVX512VLDQ-NEXT:    vcvtuqq2ps %ymm0, %xmm0
2585; AVX512VLDQ-NEXT:    vzeroupper
2586; AVX512VLDQ-NEXT:    retq
2587  %cvt = uitofp <4 x i64> %a to <4 x float>
2588  ret <4 x float> %cvt
2589}
2590
2591define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
2592; SSE2-LABEL: uitofp_8i32_to_8f32:
2593; SSE2:       # %bb.0:
2594; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
2595; SSE2-NEXT:    movdqa %xmm0, %xmm3
2596; SSE2-NEXT:    pand %xmm2, %xmm3
2597; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
2598; SSE2-NEXT:    por %xmm4, %xmm3
2599; SSE2-NEXT:    psrld $16, %xmm0
2600; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
2601; SSE2-NEXT:    por %xmm5, %xmm0
2602; SSE2-NEXT:    movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2603; SSE2-NEXT:    subps %xmm6, %xmm0
2604; SSE2-NEXT:    addps %xmm3, %xmm0
2605; SSE2-NEXT:    pand %xmm1, %xmm2
2606; SSE2-NEXT:    por %xmm4, %xmm2
2607; SSE2-NEXT:    psrld $16, %xmm1
2608; SSE2-NEXT:    por %xmm5, %xmm1
2609; SSE2-NEXT:    subps %xmm6, %xmm1
2610; SSE2-NEXT:    addps %xmm2, %xmm1
2611; SSE2-NEXT:    retq
2612;
2613; SSE41-LABEL: uitofp_8i32_to_8f32:
2614; SSE41:       # %bb.0:
2615; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200]
2616; SSE41-NEXT:    movdqa %xmm0, %xmm3
2617; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
2618; SSE41-NEXT:    psrld $16, %xmm0
2619; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928]
2620; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
2621; SSE41-NEXT:    movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2622; SSE41-NEXT:    subps %xmm5, %xmm0
2623; SSE41-NEXT:    addps %xmm3, %xmm0
2624; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
2625; SSE41-NEXT:    psrld $16, %xmm1
2626; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
2627; SSE41-NEXT:    subps %xmm5, %xmm1
2628; SSE41-NEXT:    addps %xmm2, %xmm1
2629; SSE41-NEXT:    retq
2630;
2631; AVX1-LABEL: uitofp_8i32_to_8f32:
2632; AVX1:       # %bb.0:
2633; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
2634; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2635; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
2636; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
2637; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
2638; AVX1-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
2639; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2640; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
2641; AVX1-NEXT:    vaddps %ymm0, %ymm1, %ymm0
2642; AVX1-NEXT:    retq
2643;
2644; AVX2-LABEL: uitofp_8i32_to_8f32:
2645; AVX2:       # %bb.0:
2646; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
2647; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2648; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
2649; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
2650; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
2651; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
2652; AVX2-NEXT:    vsubps %ymm2, %ymm0, %ymm0
2653; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
2654; AVX2-NEXT:    retq
2655;
2656; AVX512F-LABEL: uitofp_8i32_to_8f32:
2657; AVX512F:       # %bb.0:
2658; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2659; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
2660; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2661; AVX512F-NEXT:    retq
2662;
2663; AVX512VL-LABEL: uitofp_8i32_to_8f32:
2664; AVX512VL:       # %bb.0:
2665; AVX512VL-NEXT:    vcvtudq2ps %ymm0, %ymm0
2666; AVX512VL-NEXT:    retq
2667;
2668; AVX512DQ-LABEL: uitofp_8i32_to_8f32:
2669; AVX512DQ:       # %bb.0:
2670; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2671; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
2672; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2673; AVX512DQ-NEXT:    retq
2674;
2675; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32:
2676; AVX512VLDQ:       # %bb.0:
2677; AVX512VLDQ-NEXT:    vcvtudq2ps %ymm0, %ymm0
2678; AVX512VLDQ-NEXT:    retq
2679  %cvt = uitofp <8 x i32> %a to <8 x float>
2680  ret <8 x float> %cvt
2681}
2682
2683define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
2684; SSE2-LABEL: uitofp_8i16_to_8f32:
2685; SSE2:       # %bb.0:
2686; SSE2-NEXT:    pxor %xmm1, %xmm1
2687; SSE2-NEXT:    movdqa %xmm0, %xmm2
2688; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2689; SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
2690; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2691; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
2692; SSE2-NEXT:    movaps %xmm2, %xmm0
2693; SSE2-NEXT:    retq
2694;
2695; SSE41-LABEL: uitofp_8i16_to_8f32:
2696; SSE41:       # %bb.0:
2697; SSE41-NEXT:    pxor %xmm1, %xmm1
2698; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2699; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2700; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
2701; SSE41-NEXT:    cvtdq2ps %xmm2, %xmm0
2702; SSE41-NEXT:    retq
2703;
2704; AVX1-LABEL: uitofp_8i16_to_8f32:
2705; AVX1:       # %bb.0:
2706; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2707; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2708; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2709; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2710; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
2711; AVX1-NEXT:    retq
2712;
2713; AVX2-LABEL: uitofp_8i16_to_8f32:
2714; AVX2:       # %bb.0:
2715; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2716; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
2717; AVX2-NEXT:    retq
2718;
2719; AVX512-LABEL: uitofp_8i16_to_8f32:
2720; AVX512:       # %bb.0:
2721; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2722; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
2723; AVX512-NEXT:    retq
2724  %cvt = uitofp <8 x i16> %a to <8 x float>
2725  ret <8 x float> %cvt
2726}
2727
2728define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
2729; SSE2-LABEL: uitofp_8i8_to_8f32:
2730; SSE2:       # %bb.0:
2731; SSE2-NEXT:    pxor %xmm1, %xmm1
2732; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2733; SSE2-NEXT:    movdqa %xmm0, %xmm2
2734; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2735; SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
2736; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2737; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
2738; SSE2-NEXT:    movaps %xmm2, %xmm0
2739; SSE2-NEXT:    retq
2740;
2741; SSE41-LABEL: uitofp_8i8_to_8f32:
2742; SSE41:       # %bb.0:
2743; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2744; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm2
2745; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2746; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2747; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
2748; SSE41-NEXT:    movaps %xmm2, %xmm0
2749; SSE41-NEXT:    retq
2750;
2751; AVX1-LABEL: uitofp_8i8_to_8f32:
2752; AVX1:       # %bb.0:
2753; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2754; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2755; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2756; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2757; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
2758; AVX1-NEXT:    retq
2759;
2760; AVX2-LABEL: uitofp_8i8_to_8f32:
2761; AVX2:       # %bb.0:
2762; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2763; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
2764; AVX2-NEXT:    retq
2765;
2766; AVX512-LABEL: uitofp_8i8_to_8f32:
2767; AVX512:       # %bb.0:
2768; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2769; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
2770; AVX512-NEXT:    retq
2771  %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2772  %cvt = uitofp <8 x i8> %shuf to <8 x float>
2773  ret <8 x float> %cvt
2774}
2775
2776define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
2777; SSE2-LABEL: uitofp_16i8_to_8f32:
2778; SSE2:       # %bb.0:
2779; SSE2-NEXT:    pxor %xmm1, %xmm1
2780; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2781; SSE2-NEXT:    movdqa %xmm0, %xmm2
2782; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2783; SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
2784; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2785; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
2786; SSE2-NEXT:    movaps %xmm2, %xmm0
2787; SSE2-NEXT:    retq
2788;
2789; SSE41-LABEL: uitofp_16i8_to_8f32:
2790; SSE41:       # %bb.0:
2791; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2792; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm2
2793; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2794; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2795; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm1
2796; SSE41-NEXT:    movaps %xmm2, %xmm0
2797; SSE41-NEXT:    retq
2798;
2799; AVX1-LABEL: uitofp_16i8_to_8f32:
2800; AVX1:       # %bb.0:
2801; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2802; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2803; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2804; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2805; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
2806; AVX1-NEXT:    retq
2807;
2808; AVX2-LABEL: uitofp_16i8_to_8f32:
2809; AVX2:       # %bb.0:
2810; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2811; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
2812; AVX2-NEXT:    retq
2813;
2814; AVX512-LABEL: uitofp_16i8_to_8f32:
2815; AVX512:       # %bb.0:
2816; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2817; AVX512-NEXT:    vcvtdq2ps %zmm0, %zmm0
2818; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2819; AVX512-NEXT:    retq
2820  %cvt = uitofp <16 x i8> %a to <16 x float>
2821  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2822  ret <8 x float> %shuf
2823}
2824
2825;
2826; Load Signed Integer to Double
2827;
2828
2829define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) {
2830; SSE-LABEL: sitofp_load_2i64_to_2f64:
2831; SSE:       # %bb.0:
2832; SSE-NEXT:    cvtsi2sdq 8(%rdi), %xmm1
2833; SSE-NEXT:    cvtsi2sdq (%rdi), %xmm0
2834; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2835; SSE-NEXT:    retq
2836;
2837; VEX-LABEL: sitofp_load_2i64_to_2f64:
2838; VEX:       # %bb.0:
2839; VEX-NEXT:    vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2840; VEX-NEXT:    vcvtsi2sdq (%rdi), %xmm1, %xmm1
2841; VEX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2842; VEX-NEXT:    retq
2843;
2844; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
2845; AVX512F:       # %bb.0:
2846; AVX512F-NEXT:    vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2847; AVX512F-NEXT:    vcvtsi2sdq (%rdi), %xmm1, %xmm1
2848; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2849; AVX512F-NEXT:    retq
2850;
2851; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
2852; AVX512VL:       # %bb.0:
2853; AVX512VL-NEXT:    vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
2854; AVX512VL-NEXT:    vcvtsi2sdq (%rdi), %xmm1, %xmm1
2855; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2856; AVX512VL-NEXT:    retq
2857;
2858; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
2859; AVX512DQ:       # %bb.0:
2860; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
2861; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
2862; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2863; AVX512DQ-NEXT:    vzeroupper
2864; AVX512DQ-NEXT:    retq
2865;
2866; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64:
2867; AVX512VLDQ:       # %bb.0:
2868; AVX512VLDQ-NEXT:    vcvtqq2pd (%rdi), %xmm0
2869; AVX512VLDQ-NEXT:    retq
2870  %ld = load <2 x i64>, ptr%a
2871  %cvt = sitofp <2 x i64> %ld to <2 x double>
2872  ret <2 x double> %cvt
2873}
2874
2875define <2 x double> @sitofp_load_2i32_to_2f64(ptr%a) {
2876; SSE-LABEL: sitofp_load_2i32_to_2f64:
2877; SSE:       # %bb.0:
2878; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
2879; SSE-NEXT:    retq
2880;
2881; AVX-LABEL: sitofp_load_2i32_to_2f64:
2882; AVX:       # %bb.0:
2883; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
2884; AVX-NEXT:    retq
2885  %ld = load <2 x i32>, ptr%a
2886  %cvt = sitofp <2 x i32> %ld to <2 x double>
2887  ret <2 x double> %cvt
2888}
2889
2890define <2 x double> @sitofp_volatile_load_4i32_to_2f64(ptr%a) {
2891; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
2892; SSE:       # %bb.0:
2893; SSE-NEXT:    movaps (%rdi), %xmm0
2894; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
2895; SSE-NEXT:    retq
2896;
2897; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
2898; AVX:       # %bb.0:
2899; AVX-NEXT:    vmovaps (%rdi), %xmm0
2900; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
2901; AVX-NEXT:    retq
2902  %ld = load volatile <4 x i32>, ptr%a
2903  %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2904  %cvt = sitofp <2 x i32> %b to <2 x double>
2905  ret <2 x double> %cvt
2906}
2907
2908define <2 x double> @sitofp_load_4i32_to_2f64_2(ptr %x) {
2909; SSE-LABEL: sitofp_load_4i32_to_2f64_2:
2910; SSE:       # %bb.0:
2911; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
2912; SSE-NEXT:    retq
2913;
2914; AVX-LABEL: sitofp_load_4i32_to_2f64_2:
2915; AVX:       # %bb.0:
2916; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
2917; AVX-NEXT:    retq
2918  %a = load <4 x i32>, ptr %x
2919  %b = sitofp <4 x i32> %a to <4 x double>
2920  %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
2921  ret <2 x double> %c
2922}
2923
2924define <2 x double> @sitofp_volatile_load_4i32_to_2f64_2(ptr %x) {
2925; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64_2:
2926; SSE:       # %bb.0:
2927; SSE-NEXT:    movaps (%rdi), %xmm0
2928; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
2929; SSE-NEXT:    retq
2930;
2931; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64_2:
2932; AVX:       # %bb.0:
2933; AVX-NEXT:    vmovaps (%rdi), %xmm0
2934; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
2935; AVX-NEXT:    retq
2936  %a = load volatile <4 x i32>, ptr %x
2937  %b = sitofp <4 x i32> %a to <4 x double>
2938  %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
2939  ret <2 x double> %c
2940}
2941
2942define <2 x double> @sitofp_load_2i16_to_2f64(ptr%a) {
2943; SSE2-LABEL: sitofp_load_2i16_to_2f64:
2944; SSE2:       # %bb.0:
2945; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2946; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2947; SSE2-NEXT:    psrad $16, %xmm0
2948; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
2949; SSE2-NEXT:    retq
2950;
2951; SSE41-LABEL: sitofp_load_2i16_to_2f64:
2952; SSE41:       # %bb.0:
2953; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2954; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
2955; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
2956; SSE41-NEXT:    retq
2957;
2958; AVX-LABEL: sitofp_load_2i16_to_2f64:
2959; AVX:       # %bb.0:
2960; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2961; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
2962; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
2963; AVX-NEXT:    retq
2964  %ld = load <2 x i16>, ptr%a
2965  %cvt = sitofp <2 x i16> %ld to <2 x double>
2966  ret <2 x double> %cvt
2967}
2968
2969define <2 x double> @sitofp_load_2i8_to_2f64(ptr%a) {
2970; SSE2-LABEL: sitofp_load_2i8_to_2f64:
2971; SSE2:       # %bb.0:
2972; SSE2-NEXT:    movzwl (%rdi), %eax
2973; SSE2-NEXT:    movd %eax, %xmm0
2974; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2975; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2976; SSE2-NEXT:    psrad $24, %xmm0
2977; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
2978; SSE2-NEXT:    retq
2979;
2980; SSE41-LABEL: sitofp_load_2i8_to_2f64:
2981; SSE41:       # %bb.0:
2982; SSE41-NEXT:    movzwl (%rdi), %eax
2983; SSE41-NEXT:    movd %eax, %xmm0
2984; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
2985; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
2986; SSE41-NEXT:    retq
2987;
2988; AVX-LABEL: sitofp_load_2i8_to_2f64:
2989; AVX:       # %bb.0:
2990; AVX-NEXT:    movzwl (%rdi), %eax
2991; AVX-NEXT:    vmovd %eax, %xmm0
2992; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
2993; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
2994; AVX-NEXT:    retq
2995  %ld = load <2 x i8>, ptr%a
2996  %cvt = sitofp <2 x i8> %ld to <2 x double>
2997  ret <2 x double> %cvt
2998}
2999
3000define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) {
3001; SSE-LABEL: sitofp_load_4i64_to_4f64:
3002; SSE:       # %bb.0:
3003; SSE-NEXT:    cvtsi2sdq 8(%rdi), %xmm1
3004; SSE-NEXT:    cvtsi2sdq (%rdi), %xmm0
3005; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3006; SSE-NEXT:    cvtsi2sdq 24(%rdi), %xmm2
3007; SSE-NEXT:    xorps %xmm1, %xmm1
3008; SSE-NEXT:    cvtsi2sdq 16(%rdi), %xmm1
3009; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3010; SSE-NEXT:    retq
3011;
3012; VEX-LABEL: sitofp_load_4i64_to_4f64:
3013; VEX:       # %bb.0:
3014; VEX-NEXT:    vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3015; VEX-NEXT:    vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3016; VEX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3017; VEX-NEXT:    vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3018; VEX-NEXT:    vcvtsi2sdq (%rdi), %xmm2, %xmm2
3019; VEX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3020; VEX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3021; VEX-NEXT:    retq
3022;
3023; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
3024; AVX512F:       # %bb.0:
3025; AVX512F-NEXT:    vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3026; AVX512F-NEXT:    vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3027; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3028; AVX512F-NEXT:    vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3029; AVX512F-NEXT:    vcvtsi2sdq (%rdi), %xmm2, %xmm2
3030; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3031; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3032; AVX512F-NEXT:    retq
3033;
3034; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
3035; AVX512VL:       # %bb.0:
3036; AVX512VL-NEXT:    vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
3037; AVX512VL-NEXT:    vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
3038; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3039; AVX512VL-NEXT:    vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
3040; AVX512VL-NEXT:    vcvtsi2sdq (%rdi), %xmm2, %xmm2
3041; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
3042; AVX512VL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3043; AVX512VL-NEXT:    retq
3044;
3045; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
3046; AVX512DQ:       # %bb.0:
3047; AVX512DQ-NEXT:    vmovaps (%rdi), %ymm0
3048; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
3049; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
3050; AVX512DQ-NEXT:    retq
3051;
3052; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64:
3053; AVX512VLDQ:       # %bb.0:
3054; AVX512VLDQ-NEXT:    vcvtqq2pd (%rdi), %ymm0
3055; AVX512VLDQ-NEXT:    retq
3056  %ld = load <4 x i64>, ptr%a
3057  %cvt = sitofp <4 x i64> %ld to <4 x double>
3058  ret <4 x double> %cvt
3059}
3060
3061define <4 x double> @sitofp_load_4i32_to_4f64(ptr%a) {
3062; SSE-LABEL: sitofp_load_4i32_to_4f64:
3063; SSE:       # %bb.0:
3064; SSE-NEXT:    movdqa (%rdi), %xmm1
3065; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
3066; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3067; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
3068; SSE-NEXT:    retq
3069;
3070; AVX-LABEL: sitofp_load_4i32_to_4f64:
3071; AVX:       # %bb.0:
3072; AVX-NEXT:    vcvtdq2pd (%rdi), %ymm0
3073; AVX-NEXT:    retq
3074  %ld = load <4 x i32>, ptr%a
3075  %cvt = sitofp <4 x i32> %ld to <4 x double>
3076  ret <4 x double> %cvt
3077}
3078
3079define <4 x double> @sitofp_load_4i16_to_4f64(ptr%a) {
3080; SSE2-LABEL: sitofp_load_4i16_to_4f64:
3081; SSE2:       # %bb.0:
3082; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
3083; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3084; SSE2-NEXT:    psrad $16, %xmm1
3085; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
3086; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3087; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
3088; SSE2-NEXT:    retq
3089;
3090; SSE41-LABEL: sitofp_load_4i16_to_4f64:
3091; SSE41:       # %bb.0:
3092; SSE41-NEXT:    pmovsxwd (%rdi), %xmm1
3093; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
3094; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3095; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
3096; SSE41-NEXT:    retq
3097;
3098; AVX-LABEL: sitofp_load_4i16_to_4f64:
3099; AVX:       # %bb.0:
3100; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
3101; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
3102; AVX-NEXT:    retq
3103  %ld = load <4 x i16>, ptr%a
3104  %cvt = sitofp <4 x i16> %ld to <4 x double>
3105  ret <4 x double> %cvt
3106}
3107
3108define <4 x double> @sitofp_load_4i8_to_4f64(ptr%a) {
3109; SSE2-LABEL: sitofp_load_4i8_to_4f64:
3110; SSE2:       # %bb.0:
3111; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3112; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3113; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3114; SSE2-NEXT:    psrad $24, %xmm1
3115; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
3116; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3117; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
3118; SSE2-NEXT:    retq
3119;
3120; SSE41-LABEL: sitofp_load_4i8_to_4f64:
3121; SSE41:       # %bb.0:
3122; SSE41-NEXT:    pmovsxbd (%rdi), %xmm1
3123; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
3124; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3125; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
3126; SSE41-NEXT:    retq
3127;
3128; AVX-LABEL: sitofp_load_4i8_to_4f64:
3129; AVX:       # %bb.0:
3130; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
3131; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
3132; AVX-NEXT:    retq
3133  %ld = load <4 x i8>, ptr%a
3134  %cvt = sitofp <4 x i8> %ld to <4 x double>
3135  ret <4 x double> %cvt
3136}
3137
3138;
3139; Load Unsigned Integer to Double
3140;
3141
3142define <2 x double> @uitofp_load_2i64_to_2f64(ptr%a) {
3143; SSE2-LABEL: uitofp_load_2i64_to_2f64:
3144; SSE2:       # %bb.0:
3145; SSE2-NEXT:    movdqa (%rdi), %xmm0
3146; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
3147; SSE2-NEXT:    pand %xmm0, %xmm1
3148; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3149; SSE2-NEXT:    psrlq $32, %xmm0
3150; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3151; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3152; SSE2-NEXT:    addpd %xmm1, %xmm0
3153; SSE2-NEXT:    retq
3154;
3155; SSE41-LABEL: uitofp_load_2i64_to_2f64:
3156; SSE41:       # %bb.0:
3157; SSE41-NEXT:    movdqa (%rdi), %xmm0
3158; SSE41-NEXT:    pxor %xmm1, %xmm1
3159; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3160; SSE41-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3161; SSE41-NEXT:    psrlq $32, %xmm0
3162; SSE41-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3163; SSE41-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3164; SSE41-NEXT:    addpd %xmm1, %xmm0
3165; SSE41-NEXT:    retq
3166;
3167; AVX1-LABEL: uitofp_load_2i64_to_2f64:
3168; AVX1:       # %bb.0:
3169; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3170; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3171; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3172; AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3173; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
3174; AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3175; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3176; AVX1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
3177; AVX1-NEXT:    retq
3178;
3179; AVX2-LABEL: uitofp_load_2i64_to_2f64:
3180; AVX2:       # %bb.0:
3181; AVX2-NEXT:    vmovdqa (%rdi), %xmm0
3182; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3183; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3184; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3185; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
3186; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3187; AVX2-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3188; AVX2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
3189; AVX2-NEXT:    retq
3190;
3191; AVX512F-LABEL: uitofp_load_2i64_to_2f64:
3192; AVX512F:       # %bb.0:
3193; AVX512F-NEXT:    vmovdqa (%rdi), %xmm0
3194; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3195; AVX512F-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3196; AVX512F-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
3197; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm0
3198; AVX512F-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3199; AVX512F-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3200; AVX512F-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
3201; AVX512F-NEXT:    retq
3202;
3203; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
3204; AVX512VL:       # %bb.0:
3205; AVX512VL-NEXT:    vmovdqa (%rdi), %xmm0
3206; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3207; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3208; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
3209; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm0
3210; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
3211; AVX512VL-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
3212; AVX512VL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
3213; AVX512VL-NEXT:    retq
3214;
3215; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
3216; AVX512DQ:       # %bb.0:
3217; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
3218; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
3219; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3220; AVX512DQ-NEXT:    vzeroupper
3221; AVX512DQ-NEXT:    retq
3222;
3223; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64:
3224; AVX512VLDQ:       # %bb.0:
3225; AVX512VLDQ-NEXT:    vcvtuqq2pd (%rdi), %xmm0
3226; AVX512VLDQ-NEXT:    retq
3227  %ld = load <2 x i64>, ptr%a
3228  %cvt = uitofp <2 x i64> %ld to <2 x double>
3229  ret <2 x double> %cvt
3230}
3231
3232define <2 x double> @uitofp_load_2i32_to_2f64(ptr%a) {
3233; SSE2-LABEL: uitofp_load_2i32_to_2f64:
3234; SSE2:       # %bb.0:
3235; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3236; SSE2-NEXT:    xorpd %xmm1, %xmm1
3237; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3238; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3239; SSE2-NEXT:    orpd %xmm1, %xmm0
3240; SSE2-NEXT:    subpd %xmm1, %xmm0
3241; SSE2-NEXT:    retq
3242;
3243; SSE41-LABEL: uitofp_load_2i32_to_2f64:
3244; SSE41:       # %bb.0:
3245; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3246; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3247; SSE41-NEXT:    por %xmm1, %xmm0
3248; SSE41-NEXT:    subpd %xmm1, %xmm0
3249; SSE41-NEXT:    retq
3250;
3251; AVX1-LABEL: uitofp_load_2i32_to_2f64:
3252; AVX1:       # %bb.0:
3253; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3254; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3255; AVX1-NEXT:    # xmm1 = mem[0,0]
3256; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
3257; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3258; AVX1-NEXT:    retq
3259;
3260; AVX2-LABEL: uitofp_load_2i32_to_2f64:
3261; AVX2:       # %bb.0:
3262; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3263; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3264; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
3265; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3266; AVX2-NEXT:    retq
3267;
3268; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
3269; AVX512F:       # %bb.0:
3270; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3271; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
3272; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3273; AVX512F-NEXT:    vzeroupper
3274; AVX512F-NEXT:    retq
3275;
3276; AVX512VL-LABEL: uitofp_load_2i32_to_2f64:
3277; AVX512VL:       # %bb.0:
3278; AVX512VL-NEXT:    vcvtudq2pd (%rdi), %xmm0
3279; AVX512VL-NEXT:    retq
3280;
3281; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64:
3282; AVX512DQ:       # %bb.0:
3283; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3284; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
3285; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3286; AVX512DQ-NEXT:    vzeroupper
3287; AVX512DQ-NEXT:    retq
3288;
3289; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64:
3290; AVX512VLDQ:       # %bb.0:
3291; AVX512VLDQ-NEXT:    vcvtudq2pd (%rdi), %xmm0
3292; AVX512VLDQ-NEXT:    retq
3293  %ld = load <2 x i32>, ptr%a
3294  %cvt = uitofp <2 x i32> %ld to <2 x double>
3295  ret <2 x double> %cvt
3296}
3297
3298define <2 x double> @uitofp_load_4i32_to_2f64_2(ptr %x) {
3299; SSE2-LABEL: uitofp_load_4i32_to_2f64_2:
3300; SSE2:       # %bb.0:
3301; SSE2-NEXT:    movapd (%rdi), %xmm0
3302; SSE2-NEXT:    xorpd %xmm1, %xmm1
3303; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3304; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3305; SSE2-NEXT:    orpd %xmm1, %xmm0
3306; SSE2-NEXT:    subpd %xmm1, %xmm0
3307; SSE2-NEXT:    retq
3308;
3309; SSE41-LABEL: uitofp_load_4i32_to_2f64_2:
3310; SSE41:       # %bb.0:
3311; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3312; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3313; SSE41-NEXT:    por %xmm1, %xmm0
3314; SSE41-NEXT:    subpd %xmm1, %xmm0
3315; SSE41-NEXT:    retq
3316;
3317; AVX1-LABEL: uitofp_load_4i32_to_2f64_2:
3318; AVX1:       # %bb.0:
3319; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3320; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3321; AVX1-NEXT:    # xmm1 = mem[0,0]
3322; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
3323; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3324; AVX1-NEXT:    retq
3325;
3326; AVX2-LABEL: uitofp_load_4i32_to_2f64_2:
3327; AVX2:       # %bb.0:
3328; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3329; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3330; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
3331; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3332; AVX2-NEXT:    vzeroupper
3333; AVX2-NEXT:    retq
3334;
3335; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2:
3336; AVX512F:       # %bb.0:
3337; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
3338; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
3339; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3340; AVX512F-NEXT:    vzeroupper
3341; AVX512F-NEXT:    retq
3342;
3343; AVX512VL-LABEL: uitofp_load_4i32_to_2f64_2:
3344; AVX512VL:       # %bb.0:
3345; AVX512VL-NEXT:    vcvtudq2pd (%rdi), %xmm0
3346; AVX512VL-NEXT:    retq
3347;
3348; AVX512DQ-LABEL: uitofp_load_4i32_to_2f64_2:
3349; AVX512DQ:       # %bb.0:
3350; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
3351; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
3352; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3353; AVX512DQ-NEXT:    vzeroupper
3354; AVX512DQ-NEXT:    retq
3355;
3356; AVX512VLDQ-LABEL: uitofp_load_4i32_to_2f64_2:
3357; AVX512VLDQ:       # %bb.0:
3358; AVX512VLDQ-NEXT:    vcvtudq2pd (%rdi), %xmm0
3359; AVX512VLDQ-NEXT:    retq
3360  %a = load <4 x i32>, ptr %x
3361  %b = uitofp <4 x i32> %a to <4 x double>
3362  %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
3363  ret <2 x double> %c
3364}
3365
3366define <2 x double> @uitofp_volatile_load_4i32_to_2f64_2(ptr %x) {
3367; SSE2-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3368; SSE2:       # %bb.0:
3369; SSE2-NEXT:    movapd (%rdi), %xmm0
3370; SSE2-NEXT:    xorpd %xmm1, %xmm1
3371; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3372; SSE2-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3373; SSE2-NEXT:    orpd %xmm1, %xmm0
3374; SSE2-NEXT:    subpd %xmm1, %xmm0
3375; SSE2-NEXT:    retq
3376;
3377; SSE41-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3378; SSE41:       # %bb.0:
3379; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3380; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3381; SSE41-NEXT:    por %xmm1, %xmm0
3382; SSE41-NEXT:    subpd %xmm1, %xmm0
3383; SSE41-NEXT:    retq
3384;
3385; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3386; AVX1:       # %bb.0:
3387; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3388; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3389; AVX1-NEXT:    # xmm1 = mem[0,0]
3390; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
3391; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3392; AVX1-NEXT:    retq
3393;
3394; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3395; AVX2:       # %bb.0:
3396; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3397; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
3398; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
3399; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
3400; AVX2-NEXT:    vzeroupper
3401; AVX2-NEXT:    retq
3402;
3403; AVX512F-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3404; AVX512F:       # %bb.0:
3405; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
3406; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
3407; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3408; AVX512F-NEXT:    vzeroupper
3409; AVX512F-NEXT:    retq
3410;
3411; AVX512VL-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3412; AVX512VL:       # %bb.0:
3413; AVX512VL-NEXT:    vmovaps (%rdi), %xmm0
3414; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
3415; AVX512VL-NEXT:    retq
3416;
3417; AVX512DQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3418; AVX512DQ:       # %bb.0:
3419; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
3420; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
3421; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3422; AVX512DQ-NEXT:    vzeroupper
3423; AVX512DQ-NEXT:    retq
3424;
3425; AVX512VLDQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2:
3426; AVX512VLDQ:       # %bb.0:
3427; AVX512VLDQ-NEXT:    vmovaps (%rdi), %xmm0
3428; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0
3429; AVX512VLDQ-NEXT:    retq
3430  %a = load volatile <4 x i32>, ptr %x
3431  %b = uitofp <4 x i32> %a to <4 x double>
3432  %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1>
3433  ret <2 x double> %c
3434}
3435
3436define <2 x double> @uitofp_load_2i16_to_2f64(ptr%a) {
3437; SSE2-LABEL: uitofp_load_2i16_to_2f64:
3438; SSE2:       # %bb.0:
3439; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3440; SSE2-NEXT:    pxor %xmm1, %xmm1
3441; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3442; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
3443; SSE2-NEXT:    retq
3444;
3445; SSE41-LABEL: uitofp_load_2i16_to_2f64:
3446; SSE41:       # %bb.0:
3447; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3448; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3449; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
3450; SSE41-NEXT:    retq
3451;
3452; AVX-LABEL: uitofp_load_2i16_to_2f64:
3453; AVX:       # %bb.0:
3454; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3455; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3456; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
3457; AVX-NEXT:    retq
3458  %ld = load <2 x i16>, ptr%a
3459  %cvt = uitofp <2 x i16> %ld to <2 x double>
3460  ret <2 x double> %cvt
3461}
3462
3463define <2 x double> @uitofp_load_2i8_to_2f64(ptr%a) {
3464; SSE2-LABEL: uitofp_load_2i8_to_2f64:
3465; SSE2:       # %bb.0:
3466; SSE2-NEXT:    movzwl (%rdi), %eax
3467; SSE2-NEXT:    movd %eax, %xmm0
3468; SSE2-NEXT:    pxor %xmm1, %xmm1
3469; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3470; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3471; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
3472; SSE2-NEXT:    retq
3473;
3474; SSE41-LABEL: uitofp_load_2i8_to_2f64:
3475; SSE41:       # %bb.0:
3476; SSE41-NEXT:    movzwl (%rdi), %eax
3477; SSE41-NEXT:    movd %eax, %xmm0
3478; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3479; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
3480; SSE41-NEXT:    retq
3481;
3482; AVX-LABEL: uitofp_load_2i8_to_2f64:
3483; AVX:       # %bb.0:
3484; AVX-NEXT:    movzwl (%rdi), %eax
3485; AVX-NEXT:    vmovd %eax, %xmm0
3486; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3487; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
3488; AVX-NEXT:    retq
3489  %ld = load <2 x i8>, ptr%a
3490  %cvt = uitofp <2 x i8> %ld to <2 x double>
3491  ret <2 x double> %cvt
3492}
3493
3494define <4 x double> @uitofp_load_4i64_to_4f64(ptr%a) {
3495; SSE2-LABEL: uitofp_load_4i64_to_4f64:
3496; SSE2:       # %bb.0:
3497; SSE2-NEXT:    movdqa (%rdi), %xmm0
3498; SSE2-NEXT:    movdqa 16(%rdi), %xmm1
3499; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
3500; SSE2-NEXT:    movdqa %xmm0, %xmm3
3501; SSE2-NEXT:    pand %xmm2, %xmm3
3502; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
3503; SSE2-NEXT:    por %xmm4, %xmm3
3504; SSE2-NEXT:    psrlq $32, %xmm0
3505; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
3506; SSE2-NEXT:    por %xmm5, %xmm0
3507; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
3508; SSE2-NEXT:    subpd %xmm6, %xmm0
3509; SSE2-NEXT:    addpd %xmm3, %xmm0
3510; SSE2-NEXT:    pand %xmm1, %xmm2
3511; SSE2-NEXT:    por %xmm4, %xmm2
3512; SSE2-NEXT:    psrlq $32, %xmm1
3513; SSE2-NEXT:    por %xmm5, %xmm1
3514; SSE2-NEXT:    subpd %xmm6, %xmm1
3515; SSE2-NEXT:    addpd %xmm2, %xmm1
3516; SSE2-NEXT:    retq
3517;
3518; SSE41-LABEL: uitofp_load_4i64_to_4f64:
3519; SSE41:       # %bb.0:
3520; SSE41-NEXT:    movdqa (%rdi), %xmm0
3521; SSE41-NEXT:    movdqa 16(%rdi), %xmm1
3522; SSE41-NEXT:    pxor %xmm2, %xmm2
3523; SSE41-NEXT:    movdqa %xmm0, %xmm3
3524; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
3525; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
3526; SSE41-NEXT:    por %xmm4, %xmm3
3527; SSE41-NEXT:    psrlq $32, %xmm0
3528; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
3529; SSE41-NEXT:    por %xmm5, %xmm0
3530; SSE41-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
3531; SSE41-NEXT:    subpd %xmm6, %xmm0
3532; SSE41-NEXT:    addpd %xmm3, %xmm0
3533; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3534; SSE41-NEXT:    por %xmm4, %xmm2
3535; SSE41-NEXT:    psrlq $32, %xmm1
3536; SSE41-NEXT:    por %xmm5, %xmm1
3537; SSE41-NEXT:    subpd %xmm6, %xmm1
3538; SSE41-NEXT:    addpd %xmm2, %xmm1
3539; SSE41-NEXT:    retq
3540;
3541; AVX1-LABEL: uitofp_load_4i64_to_4f64:
3542; AVX1:       # %bb.0:
3543; AVX1-NEXT:    vmovaps (%rdi), %ymm0
3544; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
3545; AVX1-NEXT:    vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3546; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
3547; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
3548; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
3549; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3550; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3551; AVX1-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
3552; AVX1-NEXT:    retq
3553;
3554; AVX2-LABEL: uitofp_load_4i64_to_4f64:
3555; AVX2:       # %bb.0:
3556; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
3557; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3558; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3559; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
3560; AVX2-NEXT:    vpor %ymm2, %ymm1, %ymm1
3561; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
3562; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
3563; AVX2-NEXT:    vpor %ymm2, %ymm0, %ymm0
3564; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
3565; AVX2-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
3566; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
3567; AVX2-NEXT:    retq
3568;
3569; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
3570; AVX512F:       # %bb.0:
3571; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
3572; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3573; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3574; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
3575; AVX512F-NEXT:    vpor %ymm2, %ymm1, %ymm1
3576; AVX512F-NEXT:    vpsrlq $32, %ymm0, %ymm0
3577; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
3578; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
3579; AVX512F-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
3580; AVX512F-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
3581; AVX512F-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
3582; AVX512F-NEXT:    retq
3583;
3584; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
3585; AVX512VL:       # %bb.0:
3586; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
3587; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3588; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
3589; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
3590; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
3591; AVX512VL-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
3592; AVX512VL-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
3593; AVX512VL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
3594; AVX512VL-NEXT:    retq
3595;
3596; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64:
3597; AVX512DQ:       # %bb.0:
3598; AVX512DQ-NEXT:    vmovaps (%rdi), %ymm0
3599; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
3600; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
3601; AVX512DQ-NEXT:    retq
3602;
3603; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64:
3604; AVX512VLDQ:       # %bb.0:
3605; AVX512VLDQ-NEXT:    vcvtuqq2pd (%rdi), %ymm0
3606; AVX512VLDQ-NEXT:    retq
3607  %ld = load <4 x i64>, ptr%a
3608  %cvt = uitofp <4 x i64> %ld to <4 x double>
3609  ret <4 x double> %cvt
3610}
3611
3612define <4 x double> @uitofp_load_4i32_to_4f64(ptr%a) {
3613; SSE2-LABEL: uitofp_load_4i32_to_4f64:
3614; SSE2:       # %bb.0:
3615; SSE2-NEXT:    movapd (%rdi), %xmm1
3616; SSE2-NEXT:    xorpd %xmm2, %xmm2
3617; SSE2-NEXT:    movapd %xmm1, %xmm0
3618; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3619; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
3620; SSE2-NEXT:    orpd %xmm3, %xmm0
3621; SSE2-NEXT:    subpd %xmm3, %xmm0
3622; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3623; SSE2-NEXT:    orpd %xmm3, %xmm1
3624; SSE2-NEXT:    subpd %xmm3, %xmm1
3625; SSE2-NEXT:    retq
3626;
3627; SSE41-LABEL: uitofp_load_4i32_to_4f64:
3628; SSE41:       # %bb.0:
3629; SSE41-NEXT:    movdqa (%rdi), %xmm1
3630; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
3631; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
3632; SSE41-NEXT:    por %xmm2, %xmm0
3633; SSE41-NEXT:    subpd %xmm2, %xmm0
3634; SSE41-NEXT:    pxor %xmm3, %xmm3
3635; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3636; SSE41-NEXT:    por %xmm2, %xmm1
3637; SSE41-NEXT:    subpd %xmm2, %xmm1
3638; SSE41-NEXT:    retq
3639;
3640; AVX1-LABEL: uitofp_load_4i32_to_4f64:
3641; AVX1:       # %bb.0:
3642; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3643; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3644; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3645; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
3646; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3647; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
3648; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
3649; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
3650; AVX1-NEXT:    retq
3651;
3652; AVX2-LABEL: uitofp_load_4i32_to_4f64:
3653; AVX2:       # %bb.0:
3654; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3655; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
3656; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
3657; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
3658; AVX2-NEXT:    retq
3659;
3660; AVX512F-LABEL: uitofp_load_4i32_to_4f64:
3661; AVX512F:       # %bb.0:
3662; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
3663; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
3664; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
3665; AVX512F-NEXT:    retq
3666;
3667; AVX512VL-LABEL: uitofp_load_4i32_to_4f64:
3668; AVX512VL:       # %bb.0:
3669; AVX512VL-NEXT:    vcvtudq2pd (%rdi), %ymm0
3670; AVX512VL-NEXT:    retq
3671;
3672; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64:
3673; AVX512DQ:       # %bb.0:
3674; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
3675; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
3676; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
3677; AVX512DQ-NEXT:    retq
3678;
3679; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64:
3680; AVX512VLDQ:       # %bb.0:
3681; AVX512VLDQ-NEXT:    vcvtudq2pd (%rdi), %ymm0
3682; AVX512VLDQ-NEXT:    retq
3683  %ld = load <4 x i32>, ptr%a
3684  %cvt = uitofp <4 x i32> %ld to <4 x double>
3685  ret <4 x double> %cvt
3686}
3687
3688define <4 x double> @uitofp_load_4i16_to_4f64(ptr%a) {
3689; SSE2-LABEL: uitofp_load_4i16_to_4f64:
3690; SSE2:       # %bb.0:
3691; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
3692; SSE2-NEXT:    pxor %xmm0, %xmm0
3693; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3694; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
3695; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3696; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
3697; SSE2-NEXT:    retq
3698;
3699; SSE41-LABEL: uitofp_load_4i16_to_4f64:
3700; SSE41:       # %bb.0:
3701; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3702; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
3703; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3704; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
3705; SSE41-NEXT:    retq
3706;
3707; AVX-LABEL: uitofp_load_4i16_to_4f64:
3708; AVX:       # %bb.0:
3709; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3710; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
3711; AVX-NEXT:    retq
3712  %ld = load <4 x i16>, ptr%a
3713  %cvt = uitofp <4 x i16> %ld to <4 x double>
3714  ret <4 x double> %cvt
3715}
3716
3717define <4 x double> @uitofp_load_4i8_to_4f64(ptr%a) {
3718; SSE2-LABEL: uitofp_load_4i8_to_4f64:
3719; SSE2:       # %bb.0:
3720; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3721; SSE2-NEXT:    pxor %xmm0, %xmm0
3722; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3723; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3724; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
3725; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3726; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
3727; SSE2-NEXT:    retq
3728;
3729; SSE41-LABEL: uitofp_load_4i8_to_4f64:
3730; SSE41:       # %bb.0:
3731; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3732; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm0
3733; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3734; SSE41-NEXT:    cvtdq2pd %xmm1, %xmm1
3735; SSE41-NEXT:    retq
3736;
3737; AVX-LABEL: uitofp_load_4i8_to_4f64:
3738; AVX:       # %bb.0:
3739; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3740; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
3741; AVX-NEXT:    retq
3742  %ld = load <4 x i8>, ptr%a
3743  %cvt = uitofp <4 x i8> %ld to <4 x double>
3744  ret <4 x double> %cvt
3745}
3746
3747;
3748; Load Signed Integer to Float
3749;
3750
3751define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) {
3752; SSE2-LABEL: sitofp_load_4i64_to_4f32:
3753; SSE2:       # %bb.0:
3754; SSE2-NEXT:    cvtsi2ssq 24(%rdi), %xmm0
3755; SSE2-NEXT:    cvtsi2ssq 16(%rdi), %xmm1
3756; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3757; SSE2-NEXT:    cvtsi2ssq 8(%rdi), %xmm2
3758; SSE2-NEXT:    xorps %xmm0, %xmm0
3759; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
3760; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3761; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3762; SSE2-NEXT:    retq
3763;
3764; SSE41-LABEL: sitofp_load_4i64_to_4f32:
3765; SSE41:       # %bb.0:
3766; SSE41-NEXT:    cvtsi2ssq 8(%rdi), %xmm1
3767; SSE41-NEXT:    cvtsi2ssq (%rdi), %xmm0
3768; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
3769; SSE41-NEXT:    xorps %xmm1, %xmm1
3770; SSE41-NEXT:    cvtsi2ssq 16(%rdi), %xmm1
3771; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3772; SSE41-NEXT:    xorps %xmm1, %xmm1
3773; SSE41-NEXT:    cvtsi2ssq 24(%rdi), %xmm1
3774; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3775; SSE41-NEXT:    retq
3776;
3777; VEX-LABEL: sitofp_load_4i64_to_4f32:
3778; VEX:       # %bb.0:
3779; VEX-NEXT:    vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3780; VEX-NEXT:    vcvtsi2ssq (%rdi), %xmm1, %xmm1
3781; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3782; VEX-NEXT:    vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3783; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3784; VEX-NEXT:    vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3785; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3786; VEX-NEXT:    retq
3787;
3788; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
3789; AVX512F:       # %bb.0:
3790; AVX512F-NEXT:    vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3791; AVX512F-NEXT:    vcvtsi2ssq (%rdi), %xmm1, %xmm1
3792; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3793; AVX512F-NEXT:    vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3794; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3795; AVX512F-NEXT:    vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3796; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3797; AVX512F-NEXT:    retq
3798;
3799; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
3800; AVX512VL:       # %bb.0:
3801; AVX512VL-NEXT:    vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
3802; AVX512VL-NEXT:    vcvtsi2ssq (%rdi), %xmm1, %xmm1
3803; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3804; AVX512VL-NEXT:    vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
3805; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3806; AVX512VL-NEXT:    vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
3807; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3808; AVX512VL-NEXT:    retq
3809;
3810; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32:
3811; AVX512DQ:       # %bb.0:
3812; AVX512DQ-NEXT:    vmovaps (%rdi), %ymm0
3813; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
3814; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
3815; AVX512DQ-NEXT:    vzeroupper
3816; AVX512DQ-NEXT:    retq
3817;
3818; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32:
3819; AVX512VLDQ:       # %bb.0:
3820; AVX512VLDQ-NEXT:    vcvtqq2psy (%rdi), %xmm0
3821; AVX512VLDQ-NEXT:    retq
3822  %ld = load <4 x i64>, ptr%a
3823  %cvt = sitofp <4 x i64> %ld to <4 x float>
3824  ret <4 x float> %cvt
3825}
3826
3827define <4 x float> @sitofp_load_4i32_to_4f32(ptr%a) {
3828; SSE-LABEL: sitofp_load_4i32_to_4f32:
3829; SSE:       # %bb.0:
3830; SSE-NEXT:    cvtdq2ps (%rdi), %xmm0
3831; SSE-NEXT:    retq
3832;
3833; AVX-LABEL: sitofp_load_4i32_to_4f32:
3834; AVX:       # %bb.0:
3835; AVX-NEXT:    vcvtdq2ps (%rdi), %xmm0
3836; AVX-NEXT:    retq
3837  %ld = load <4 x i32>, ptr%a
3838  %cvt = sitofp <4 x i32> %ld to <4 x float>
3839  ret <4 x float> %cvt
3840}
3841
3842define <4 x float> @sitofp_load_4i16_to_4f32(ptr%a) {
3843; SSE2-LABEL: sitofp_load_4i16_to_4f32:
3844; SSE2:       # %bb.0:
3845; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
3846; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3847; SSE2-NEXT:    psrad $16, %xmm0
3848; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
3849; SSE2-NEXT:    retq
3850;
3851; SSE41-LABEL: sitofp_load_4i16_to_4f32:
3852; SSE41:       # %bb.0:
3853; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
3854; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
3855; SSE41-NEXT:    retq
3856;
3857; AVX-LABEL: sitofp_load_4i16_to_4f32:
3858; AVX:       # %bb.0:
3859; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
3860; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
3861; AVX-NEXT:    retq
3862  %ld = load <4 x i16>, ptr%a
3863  %cvt = sitofp <4 x i16> %ld to <4 x float>
3864  ret <4 x float> %cvt
3865}
3866
3867define <4 x float> @sitofp_load_4i8_to_4f32(ptr%a) {
3868; SSE2-LABEL: sitofp_load_4i8_to_4f32:
3869; SSE2:       # %bb.0:
3870; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3871; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3872; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3873; SSE2-NEXT:    psrad $24, %xmm0
3874; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
3875; SSE2-NEXT:    retq
3876;
3877; SSE41-LABEL: sitofp_load_4i8_to_4f32:
3878; SSE41:       # %bb.0:
3879; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
3880; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
3881; SSE41-NEXT:    retq
3882;
3883; AVX-LABEL: sitofp_load_4i8_to_4f32:
3884; AVX:       # %bb.0:
3885; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
3886; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
3887; AVX-NEXT:    retq
3888  %ld = load <4 x i8>, ptr%a
3889  %cvt = sitofp <4 x i8> %ld to <4 x float>
3890  ret <4 x float> %cvt
3891}
3892
3893define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) {
3894; SSE2-LABEL: sitofp_load_8i64_to_8f32:
3895; SSE2:       # %bb.0:
3896; SSE2-NEXT:    cvtsi2ssq 24(%rdi), %xmm0
3897; SSE2-NEXT:    cvtsi2ssq 16(%rdi), %xmm1
3898; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3899; SSE2-NEXT:    cvtsi2ssq 8(%rdi), %xmm2
3900; SSE2-NEXT:    xorps %xmm0, %xmm0
3901; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
3902; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3903; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3904; SSE2-NEXT:    xorps %xmm1, %xmm1
3905; SSE2-NEXT:    cvtsi2ssq 56(%rdi), %xmm1
3906; SSE2-NEXT:    xorps %xmm2, %xmm2
3907; SSE2-NEXT:    cvtsi2ssq 48(%rdi), %xmm2
3908; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3909; SSE2-NEXT:    cvtsi2ssq 40(%rdi), %xmm3
3910; SSE2-NEXT:    xorps %xmm1, %xmm1
3911; SSE2-NEXT:    cvtsi2ssq 32(%rdi), %xmm1
3912; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3913; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3914; SSE2-NEXT:    retq
3915;
3916; SSE41-LABEL: sitofp_load_8i64_to_8f32:
3917; SSE41:       # %bb.0:
3918; SSE41-NEXT:    cvtsi2ssq 8(%rdi), %xmm1
3919; SSE41-NEXT:    cvtsi2ssq (%rdi), %xmm0
3920; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
3921; SSE41-NEXT:    xorps %xmm1, %xmm1
3922; SSE41-NEXT:    cvtsi2ssq 16(%rdi), %xmm1
3923; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3924; SSE41-NEXT:    xorps %xmm1, %xmm1
3925; SSE41-NEXT:    cvtsi2ssq 24(%rdi), %xmm1
3926; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3927; SSE41-NEXT:    cvtsi2ssq 40(%rdi), %xmm2
3928; SSE41-NEXT:    xorps %xmm1, %xmm1
3929; SSE41-NEXT:    cvtsi2ssq 32(%rdi), %xmm1
3930; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3931; SSE41-NEXT:    xorps %xmm2, %xmm2
3932; SSE41-NEXT:    cvtsi2ssq 48(%rdi), %xmm2
3933; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3934; SSE41-NEXT:    xorps %xmm2, %xmm2
3935; SSE41-NEXT:    cvtsi2ssq 56(%rdi), %xmm2
3936; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3937; SSE41-NEXT:    retq
3938;
3939; VEX-LABEL: sitofp_load_8i64_to_8f32:
3940; VEX:       # %bb.0:
3941; VEX-NEXT:    vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3942; VEX-NEXT:    vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3943; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3944; VEX-NEXT:    vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3945; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3946; VEX-NEXT:    vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3947; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3948; VEX-NEXT:    vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3949; VEX-NEXT:    vcvtsi2ssq (%rdi), %xmm2, %xmm2
3950; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3951; VEX-NEXT:    vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3952; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3953; VEX-NEXT:    vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3954; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3955; VEX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3956; VEX-NEXT:    retq
3957;
3958; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
3959; AVX512F:       # %bb.0:
3960; AVX512F-NEXT:    vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3961; AVX512F-NEXT:    vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3962; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3963; AVX512F-NEXT:    vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3964; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3965; AVX512F-NEXT:    vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3966; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3967; AVX512F-NEXT:    vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3968; AVX512F-NEXT:    vcvtsi2ssq (%rdi), %xmm2, %xmm2
3969; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3970; AVX512F-NEXT:    vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3971; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3972; AVX512F-NEXT:    vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3973; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3974; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3975; AVX512F-NEXT:    retq
3976;
3977; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
3978; AVX512VL:       # %bb.0:
3979; AVX512VL-NEXT:    vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
3980; AVX512VL-NEXT:    vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
3981; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3982; AVX512VL-NEXT:    vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
3983; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
3984; AVX512VL-NEXT:    vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
3985; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
3986; AVX512VL-NEXT:    vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
3987; AVX512VL-NEXT:    vcvtsi2ssq (%rdi), %xmm2, %xmm2
3988; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3989; AVX512VL-NEXT:    vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
3990; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3991; AVX512VL-NEXT:    vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
3992; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3993; AVX512VL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3994; AVX512VL-NEXT:    retq
3995;
3996; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
3997; AVX512DQ:       # %bb.0:
3998; AVX512DQ-NEXT:    vcvtqq2ps (%rdi), %ymm0
3999; AVX512DQ-NEXT:    retq
4000;
4001; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32:
4002; AVX512VLDQ:       # %bb.0:
4003; AVX512VLDQ-NEXT:    vcvtqq2ps (%rdi), %ymm0
4004; AVX512VLDQ-NEXT:    retq
4005  %ld = load <8 x i64>, ptr%a
4006  %cvt = sitofp <8 x i64> %ld to <8 x float>
4007  ret <8 x float> %cvt
4008}
4009
4010define <8 x float> @sitofp_load_8i32_to_8f32(ptr%a) {
4011; SSE-LABEL: sitofp_load_8i32_to_8f32:
4012; SSE:       # %bb.0:
4013; SSE-NEXT:    cvtdq2ps (%rdi), %xmm0
4014; SSE-NEXT:    cvtdq2ps 16(%rdi), %xmm1
4015; SSE-NEXT:    retq
4016;
4017; AVX-LABEL: sitofp_load_8i32_to_8f32:
4018; AVX:       # %bb.0:
4019; AVX-NEXT:    vcvtdq2ps (%rdi), %ymm0
4020; AVX-NEXT:    retq
4021  %ld = load <8 x i32>, ptr%a
4022  %cvt = sitofp <8 x i32> %ld to <8 x float>
4023  ret <8 x float> %cvt
4024}
4025
4026define <8 x float> @sitofp_load_8i16_to_8f32(ptr%a) {
4027; SSE2-LABEL: sitofp_load_8i16_to_8f32:
4028; SSE2:       # %bb.0:
4029; SSE2-NEXT:    movdqa (%rdi), %xmm1
4030; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4031; SSE2-NEXT:    psrad $16, %xmm0
4032; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4033; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4034; SSE2-NEXT:    psrad $16, %xmm1
4035; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
4036; SSE2-NEXT:    retq
4037;
4038; SSE41-LABEL: sitofp_load_8i16_to_8f32:
4039; SSE41:       # %bb.0:
4040; SSE41-NEXT:    pmovsxwd 8(%rdi), %xmm1
4041; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
4042; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4043; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm1
4044; SSE41-NEXT:    retq
4045;
4046; AVX1-LABEL: sitofp_load_8i16_to_8f32:
4047; AVX1:       # %bb.0:
4048; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm0
4049; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm1
4050; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4051; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
4052; AVX1-NEXT:    retq
4053;
4054; AVX2-LABEL: sitofp_load_8i16_to_8f32:
4055; AVX2:       # %bb.0:
4056; AVX2-NEXT:    vpmovsxwd (%rdi), %ymm0
4057; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
4058; AVX2-NEXT:    retq
4059;
4060; AVX512-LABEL: sitofp_load_8i16_to_8f32:
4061; AVX512:       # %bb.0:
4062; AVX512-NEXT:    vpmovsxwd (%rdi), %ymm0
4063; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
4064; AVX512-NEXT:    retq
4065  %ld = load <8 x i16>, ptr%a
4066  %cvt = sitofp <8 x i16> %ld to <8 x float>
4067  ret <8 x float> %cvt
4068}
4069
4070define <8 x float> @sitofp_load_8i8_to_8f32(ptr%a) {
4071; SSE2-LABEL: sitofp_load_8i8_to_8f32:
4072; SSE2:       # %bb.0:
4073; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
4074; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4075; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4076; SSE2-NEXT:    psrad $24, %xmm0
4077; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4078; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
4079; SSE2-NEXT:    psrad $24, %xmm1
4080; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
4081; SSE2-NEXT:    retq
4082;
4083; SSE41-LABEL: sitofp_load_8i8_to_8f32:
4084; SSE41:       # %bb.0:
4085; SSE41-NEXT:    pmovsxbd 4(%rdi), %xmm1
4086; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
4087; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4088; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm1
4089; SSE41-NEXT:    retq
4090;
4091; AVX1-LABEL: sitofp_load_8i8_to_8f32:
4092; AVX1:       # %bb.0:
4093; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm0
4094; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm1
4095; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4096; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
4097; AVX1-NEXT:    retq
4098;
4099; AVX2-LABEL: sitofp_load_8i8_to_8f32:
4100; AVX2:       # %bb.0:
4101; AVX2-NEXT:    vpmovsxbd (%rdi), %ymm0
4102; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
4103; AVX2-NEXT:    retq
4104;
4105; AVX512-LABEL: sitofp_load_8i8_to_8f32:
4106; AVX512:       # %bb.0:
4107; AVX512-NEXT:    vpmovsxbd (%rdi), %ymm0
4108; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
4109; AVX512-NEXT:    retq
4110  %ld = load <8 x i8>, ptr%a
4111  %cvt = sitofp <8 x i8> %ld to <8 x float>
4112  ret <8 x float> %cvt
4113}
4114
4115;
4116; Load Unsigned Integer to Float
4117;
4118
4119define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
4120; SSE2-LABEL: uitofp_load_4i64_to_4f32:
4121; SSE2:       # %bb.0:
4122; SSE2-NEXT:    movq 24(%rdi), %rax
4123; SSE2-NEXT:    testq %rax, %rax
4124; SSE2-NEXT:    js .LBB83_1
4125; SSE2-NEXT:  # %bb.2:
4126; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4127; SSE2-NEXT:    jmp .LBB83_3
4128; SSE2-NEXT:  .LBB83_1:
4129; SSE2-NEXT:    movq %rax, %rcx
4130; SSE2-NEXT:    shrq %rcx
4131; SSE2-NEXT:    andl $1, %eax
4132; SSE2-NEXT:    orq %rcx, %rax
4133; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4134; SSE2-NEXT:    addss %xmm0, %xmm0
4135; SSE2-NEXT:  .LBB83_3:
4136; SSE2-NEXT:    movq 16(%rdi), %rax
4137; SSE2-NEXT:    testq %rax, %rax
4138; SSE2-NEXT:    js .LBB83_4
4139; SSE2-NEXT:  # %bb.5:
4140; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4141; SSE2-NEXT:    jmp .LBB83_6
4142; SSE2-NEXT:  .LBB83_4:
4143; SSE2-NEXT:    movq %rax, %rcx
4144; SSE2-NEXT:    shrq %rcx
4145; SSE2-NEXT:    andl $1, %eax
4146; SSE2-NEXT:    orq %rcx, %rax
4147; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4148; SSE2-NEXT:    addss %xmm1, %xmm1
4149; SSE2-NEXT:  .LBB83_6:
4150; SSE2-NEXT:    movq (%rdi), %rax
4151; SSE2-NEXT:    movq 8(%rdi), %rcx
4152; SSE2-NEXT:    testq %rcx, %rcx
4153; SSE2-NEXT:    js .LBB83_7
4154; SSE2-NEXT:  # %bb.8:
4155; SSE2-NEXT:    cvtsi2ss %rcx, %xmm2
4156; SSE2-NEXT:    jmp .LBB83_9
4157; SSE2-NEXT:  .LBB83_7:
4158; SSE2-NEXT:    movq %rcx, %rdx
4159; SSE2-NEXT:    shrq %rdx
4160; SSE2-NEXT:    andl $1, %ecx
4161; SSE2-NEXT:    orq %rdx, %rcx
4162; SSE2-NEXT:    cvtsi2ss %rcx, %xmm2
4163; SSE2-NEXT:    addss %xmm2, %xmm2
4164; SSE2-NEXT:  .LBB83_9:
4165; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4166; SSE2-NEXT:    testq %rax, %rax
4167; SSE2-NEXT:    js .LBB83_10
4168; SSE2-NEXT:  # %bb.11:
4169; SSE2-NEXT:    xorps %xmm0, %xmm0
4170; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4171; SSE2-NEXT:    jmp .LBB83_12
4172; SSE2-NEXT:  .LBB83_10:
4173; SSE2-NEXT:    movq %rax, %rcx
4174; SSE2-NEXT:    shrq %rcx
4175; SSE2-NEXT:    andl $1, %eax
4176; SSE2-NEXT:    orq %rcx, %rax
4177; SSE2-NEXT:    xorps %xmm0, %xmm0
4178; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4179; SSE2-NEXT:    addss %xmm0, %xmm0
4180; SSE2-NEXT:  .LBB83_12:
4181; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4182; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4183; SSE2-NEXT:    retq
4184;
4185; SSE41-LABEL: uitofp_load_4i64_to_4f32:
4186; SSE41:       # %bb.0:
4187; SSE41-NEXT:    movdqa (%rdi), %xmm1
4188; SSE41-NEXT:    movdqa 16(%rdi), %xmm2
4189; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm4 = [1,1]
4190; SSE41-NEXT:    movdqa %xmm1, %xmm0
4191; SSE41-NEXT:    pand %xmm4, %xmm0
4192; SSE41-NEXT:    movdqa %xmm1, %xmm3
4193; SSE41-NEXT:    psrlq $1, %xmm3
4194; SSE41-NEXT:    por %xmm0, %xmm3
4195; SSE41-NEXT:    movdqa %xmm1, %xmm5
4196; SSE41-NEXT:    movdqa %xmm1, %xmm0
4197; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm5
4198; SSE41-NEXT:    pextrq $1, %xmm5, %rax
4199; SSE41-NEXT:    xorps %xmm0, %xmm0
4200; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4201; SSE41-NEXT:    movq %xmm5, %rax
4202; SSE41-NEXT:    xorps %xmm3, %xmm3
4203; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
4204; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
4205; SSE41-NEXT:    pand %xmm2, %xmm4
4206; SSE41-NEXT:    movdqa %xmm2, %xmm5
4207; SSE41-NEXT:    psrlq $1, %xmm5
4208; SSE41-NEXT:    por %xmm4, %xmm5
4209; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
4210; SSE41-NEXT:    movaps %xmm2, %xmm0
4211; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
4212; SSE41-NEXT:    movq %xmm2, %rax
4213; SSE41-NEXT:    xorps %xmm0, %xmm0
4214; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4215; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
4216; SSE41-NEXT:    pextrq $1, %xmm2, %rax
4217; SSE41-NEXT:    xorps %xmm0, %xmm0
4218; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4219; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
4220; SSE41-NEXT:    movaps %xmm3, %xmm2
4221; SSE41-NEXT:    addps %xmm3, %xmm2
4222; SSE41-NEXT:    movaps %xmm1, %xmm0
4223; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm3
4224; SSE41-NEXT:    movaps %xmm3, %xmm0
4225; SSE41-NEXT:    retq
4226;
4227; AVX1-LABEL: uitofp_load_4i64_to_4f32:
4228; AVX1:       # %bb.0:
4229; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
4230; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm1
4231; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm2
4232; AVX1-NEXT:    vpsrlq $1, %xmm2, %xmm3
4233; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
4234; AVX1-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
4235; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
4236; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
4237; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
4238; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
4239; AVX1-NEXT:    vmovq %xmm1, %rax
4240; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
4241; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
4242; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4243; AVX1-NEXT:    vmovq %xmm1, %rax
4244; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
4245; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
4246; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
4247; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
4248; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
4249; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
4250; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
4251; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
4252; AVX1-NEXT:    vzeroupper
4253; AVX1-NEXT:    retq
4254;
4255; AVX2-LABEL: uitofp_load_4i64_to_4f32:
4256; AVX2:       # %bb.0:
4257; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4258; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
4259; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm1
4260; AVX2-NEXT:    vpsrlq $1, %ymm0, %ymm2
4261; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
4262; AVX2-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
4263; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
4264; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
4265; AVX2-NEXT:    vmovq %xmm1, %rax
4266; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
4267; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4268; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
4269; AVX2-NEXT:    vmovq %xmm1, %rax
4270; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
4271; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4272; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
4273; AVX2-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm1
4274; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
4275; AVX2-NEXT:    vaddps %xmm1, %xmm1, %xmm2
4276; AVX2-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
4277; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
4278; AVX2-NEXT:    vzeroupper
4279; AVX2-NEXT:    retq
4280;
4281; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
4282; AVX512F:       # %bb.0:
4283; AVX512F-NEXT:    vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
4284; AVX512F-NEXT:    vcvtusi2ssq (%rdi), %xmm1, %xmm1
4285; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4286; AVX512F-NEXT:    vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
4287; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4288; AVX512F-NEXT:    vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
4289; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4290; AVX512F-NEXT:    retq
4291;
4292; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
4293; AVX512VL:       # %bb.0:
4294; AVX512VL-NEXT:    vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
4295; AVX512VL-NEXT:    vcvtusi2ssq (%rdi), %xmm1, %xmm1
4296; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4297; AVX512VL-NEXT:    vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
4298; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4299; AVX512VL-NEXT:    vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
4300; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4301; AVX512VL-NEXT:    retq
4302;
4303; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32:
4304; AVX512DQ:       # %bb.0:
4305; AVX512DQ-NEXT:    vmovaps (%rdi), %ymm0
4306; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
4307; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
4308; AVX512DQ-NEXT:    vzeroupper
4309; AVX512DQ-NEXT:    retq
4310;
4311; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32:
4312; AVX512VLDQ:       # %bb.0:
4313; AVX512VLDQ-NEXT:    vcvtuqq2psy (%rdi), %xmm0
4314; AVX512VLDQ-NEXT:    retq
4315  %ld = load <4 x i64>, ptr%a
4316  %cvt = uitofp <4 x i64> %ld to <4 x float>
4317  ret <4 x float> %cvt
4318}
4319
4320define <4 x float> @uitofp_load_4i32_to_4f32(ptr%a) {
4321; SSE2-LABEL: uitofp_load_4i32_to_4f32:
4322; SSE2:       # %bb.0:
4323; SSE2-NEXT:    movdqa (%rdi), %xmm0
4324; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
4325; SSE2-NEXT:    pand %xmm0, %xmm1
4326; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4327; SSE2-NEXT:    psrld $16, %xmm0
4328; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4329; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4330; SSE2-NEXT:    addps %xmm1, %xmm0
4331; SSE2-NEXT:    retq
4332;
4333; SSE41-LABEL: uitofp_load_4i32_to_4f32:
4334; SSE41:       # %bb.0:
4335; SSE41-NEXT:    movdqa (%rdi), %xmm0
4336; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
4337; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4338; SSE41-NEXT:    psrld $16, %xmm0
4339; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4340; SSE41-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4341; SSE41-NEXT:    addps %xmm1, %xmm0
4342; SSE41-NEXT:    retq
4343;
4344; AVX1-LABEL: uitofp_load_4i32_to_4f32:
4345; AVX1:       # %bb.0:
4346; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
4347; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4348; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
4349; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4350; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4351; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
4352; AVX1-NEXT:    retq
4353;
4354; AVX2-LABEL: uitofp_load_4i32_to_4f32:
4355; AVX2:       # %bb.0:
4356; AVX2-NEXT:    vmovdqa (%rdi), %xmm0
4357; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
4358; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4359; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
4360; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
4361; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
4362; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4363; AVX2-NEXT:    vsubps %xmm2, %xmm0, %xmm0
4364; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
4365; AVX2-NEXT:    retq
4366;
4367; AVX512F-LABEL: uitofp_load_4i32_to_4f32:
4368; AVX512F:       # %bb.0:
4369; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
4370; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
4371; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4372; AVX512F-NEXT:    vzeroupper
4373; AVX512F-NEXT:    retq
4374;
4375; AVX512VL-LABEL: uitofp_load_4i32_to_4f32:
4376; AVX512VL:       # %bb.0:
4377; AVX512VL-NEXT:    vcvtudq2ps (%rdi), %xmm0
4378; AVX512VL-NEXT:    retq
4379;
4380; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32:
4381; AVX512DQ:       # %bb.0:
4382; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
4383; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
4384; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4385; AVX512DQ-NEXT:    vzeroupper
4386; AVX512DQ-NEXT:    retq
4387;
4388; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32:
4389; AVX512VLDQ:       # %bb.0:
4390; AVX512VLDQ-NEXT:    vcvtudq2ps (%rdi), %xmm0
4391; AVX512VLDQ-NEXT:    retq
4392  %ld = load <4 x i32>, ptr%a
4393  %cvt = uitofp <4 x i32> %ld to <4 x float>
4394  ret <4 x float> %cvt
4395}
4396
4397define <4 x float> @uitofp_load_4i16_to_4f32(ptr%a) {
4398; SSE2-LABEL: uitofp_load_4i16_to_4f32:
4399; SSE2:       # %bb.0:
4400; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
4401; SSE2-NEXT:    pxor %xmm1, %xmm1
4402; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4403; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4404; SSE2-NEXT:    retq
4405;
4406; SSE41-LABEL: uitofp_load_4i16_to_4f32:
4407; SSE41:       # %bb.0:
4408; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4409; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4410; SSE41-NEXT:    retq
4411;
4412; AVX-LABEL: uitofp_load_4i16_to_4f32:
4413; AVX:       # %bb.0:
4414; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4415; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
4416; AVX-NEXT:    retq
4417  %ld = load <4 x i16>, ptr%a
4418  %cvt = uitofp <4 x i16> %ld to <4 x float>
4419  ret <4 x float> %cvt
4420}
4421
4422define <4 x float> @uitofp_load_4i8_to_4f32(ptr%a) {
4423; SSE2-LABEL: uitofp_load_4i8_to_4f32:
4424; SSE2:       # %bb.0:
4425; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4426; SSE2-NEXT:    pxor %xmm1, %xmm1
4427; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4428; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4429; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4430; SSE2-NEXT:    retq
4431;
4432; SSE41-LABEL: uitofp_load_4i8_to_4f32:
4433; SSE41:       # %bb.0:
4434; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4435; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4436; SSE41-NEXT:    retq
4437;
4438; AVX-LABEL: uitofp_load_4i8_to_4f32:
4439; AVX:       # %bb.0:
4440; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4441; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
4442; AVX-NEXT:    retq
4443  %ld = load <4 x i8>, ptr%a
4444  %cvt = uitofp <4 x i8> %ld to <4 x float>
4445  ret <4 x float> %cvt
4446}
4447
4448define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
4449; SSE2-LABEL: uitofp_load_8i64_to_8f32:
4450; SSE2:       # %bb.0:
4451; SSE2-NEXT:    movq 24(%rdi), %rax
4452; SSE2-NEXT:    testq %rax, %rax
4453; SSE2-NEXT:    js .LBB87_1
4454; SSE2-NEXT:  # %bb.2:
4455; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
4456; SSE2-NEXT:    jmp .LBB87_3
4457; SSE2-NEXT:  .LBB87_1:
4458; SSE2-NEXT:    movq %rax, %rcx
4459; SSE2-NEXT:    shrq %rcx
4460; SSE2-NEXT:    andl $1, %eax
4461; SSE2-NEXT:    orq %rcx, %rax
4462; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
4463; SSE2-NEXT:    addss %xmm2, %xmm2
4464; SSE2-NEXT:  .LBB87_3:
4465; SSE2-NEXT:    movq 16(%rdi), %rax
4466; SSE2-NEXT:    testq %rax, %rax
4467; SSE2-NEXT:    js .LBB87_4
4468; SSE2-NEXT:  # %bb.5:
4469; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4470; SSE2-NEXT:    jmp .LBB87_6
4471; SSE2-NEXT:  .LBB87_4:
4472; SSE2-NEXT:    movq %rax, %rcx
4473; SSE2-NEXT:    shrq %rcx
4474; SSE2-NEXT:    andl $1, %eax
4475; SSE2-NEXT:    orq %rcx, %rax
4476; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4477; SSE2-NEXT:    addss %xmm1, %xmm1
4478; SSE2-NEXT:  .LBB87_6:
4479; SSE2-NEXT:    movq (%rdi), %rax
4480; SSE2-NEXT:    movq 8(%rdi), %rcx
4481; SSE2-NEXT:    testq %rcx, %rcx
4482; SSE2-NEXT:    js .LBB87_7
4483; SSE2-NEXT:  # %bb.8:
4484; SSE2-NEXT:    cvtsi2ss %rcx, %xmm3
4485; SSE2-NEXT:    testq %rax, %rax
4486; SSE2-NEXT:    jns .LBB87_11
4487; SSE2-NEXT:  .LBB87_10:
4488; SSE2-NEXT:    movq %rax, %rcx
4489; SSE2-NEXT:    shrq %rcx
4490; SSE2-NEXT:    andl $1, %eax
4491; SSE2-NEXT:    orq %rcx, %rax
4492; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4493; SSE2-NEXT:    addss %xmm0, %xmm0
4494; SSE2-NEXT:    jmp .LBB87_12
4495; SSE2-NEXT:  .LBB87_7:
4496; SSE2-NEXT:    movq %rcx, %rdx
4497; SSE2-NEXT:    shrq %rdx
4498; SSE2-NEXT:    andl $1, %ecx
4499; SSE2-NEXT:    orq %rdx, %rcx
4500; SSE2-NEXT:    cvtsi2ss %rcx, %xmm3
4501; SSE2-NEXT:    addss %xmm3, %xmm3
4502; SSE2-NEXT:    testq %rax, %rax
4503; SSE2-NEXT:    js .LBB87_10
4504; SSE2-NEXT:  .LBB87_11:
4505; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
4506; SSE2-NEXT:  .LBB87_12:
4507; SSE2-NEXT:    movq 56(%rdi), %rax
4508; SSE2-NEXT:    testq %rax, %rax
4509; SSE2-NEXT:    js .LBB87_13
4510; SSE2-NEXT:  # %bb.14:
4511; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
4512; SSE2-NEXT:    jmp .LBB87_15
4513; SSE2-NEXT:  .LBB87_13:
4514; SSE2-NEXT:    movq %rax, %rcx
4515; SSE2-NEXT:    shrq %rcx
4516; SSE2-NEXT:    andl $1, %eax
4517; SSE2-NEXT:    orq %rcx, %rax
4518; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
4519; SSE2-NEXT:    addss %xmm5, %xmm5
4520; SSE2-NEXT:  .LBB87_15:
4521; SSE2-NEXT:    movq 48(%rdi), %rax
4522; SSE2-NEXT:    testq %rax, %rax
4523; SSE2-NEXT:    js .LBB87_16
4524; SSE2-NEXT:  # %bb.17:
4525; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
4526; SSE2-NEXT:    jmp .LBB87_18
4527; SSE2-NEXT:  .LBB87_16:
4528; SSE2-NEXT:    movq %rax, %rcx
4529; SSE2-NEXT:    shrq %rcx
4530; SSE2-NEXT:    andl $1, %eax
4531; SSE2-NEXT:    orq %rcx, %rax
4532; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
4533; SSE2-NEXT:    addss %xmm4, %xmm4
4534; SSE2-NEXT:  .LBB87_18:
4535; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4536; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4537; SSE2-NEXT:    movq 40(%rdi), %rax
4538; SSE2-NEXT:    testq %rax, %rax
4539; SSE2-NEXT:    js .LBB87_19
4540; SSE2-NEXT:  # %bb.20:
4541; SSE2-NEXT:    xorps %xmm2, %xmm2
4542; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
4543; SSE2-NEXT:    jmp .LBB87_21
4544; SSE2-NEXT:  .LBB87_19:
4545; SSE2-NEXT:    movq %rax, %rcx
4546; SSE2-NEXT:    shrq %rcx
4547; SSE2-NEXT:    andl $1, %eax
4548; SSE2-NEXT:    orq %rcx, %rax
4549; SSE2-NEXT:    xorps %xmm2, %xmm2
4550; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
4551; SSE2-NEXT:    addss %xmm2, %xmm2
4552; SSE2-NEXT:  .LBB87_21:
4553; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4554; SSE2-NEXT:    unpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
4555; SSE2-NEXT:    movq 32(%rdi), %rax
4556; SSE2-NEXT:    testq %rax, %rax
4557; SSE2-NEXT:    js .LBB87_22
4558; SSE2-NEXT:  # %bb.23:
4559; SSE2-NEXT:    xorps %xmm1, %xmm1
4560; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4561; SSE2-NEXT:    jmp .LBB87_24
4562; SSE2-NEXT:  .LBB87_22:
4563; SSE2-NEXT:    movq %rax, %rcx
4564; SSE2-NEXT:    shrq %rcx
4565; SSE2-NEXT:    andl $1, %eax
4566; SSE2-NEXT:    orq %rcx, %rax
4567; SSE2-NEXT:    xorps %xmm1, %xmm1
4568; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
4569; SSE2-NEXT:    addss %xmm1, %xmm1
4570; SSE2-NEXT:  .LBB87_24:
4571; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4572; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
4573; SSE2-NEXT:    retq
4574;
4575; SSE41-LABEL: uitofp_load_8i64_to_8f32:
4576; SSE41:       # %bb.0:
4577; SSE41-NEXT:    movdqa (%rdi), %xmm4
4578; SSE41-NEXT:    movdqa 16(%rdi), %xmm5
4579; SSE41-NEXT:    movdqa 32(%rdi), %xmm6
4580; SSE41-NEXT:    movdqa 48(%rdi), %xmm2
4581; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm7 = [1,1]
4582; SSE41-NEXT:    movdqa %xmm4, %xmm0
4583; SSE41-NEXT:    pand %xmm7, %xmm0
4584; SSE41-NEXT:    movdqa %xmm4, %xmm1
4585; SSE41-NEXT:    psrlq $1, %xmm1
4586; SSE41-NEXT:    por %xmm0, %xmm1
4587; SSE41-NEXT:    movdqa %xmm4, %xmm3
4588; SSE41-NEXT:    movdqa %xmm4, %xmm0
4589; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
4590; SSE41-NEXT:    pextrq $1, %xmm3, %rax
4591; SSE41-NEXT:    xorps %xmm0, %xmm0
4592; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4593; SSE41-NEXT:    movq %xmm3, %rax
4594; SSE41-NEXT:    xorps %xmm3, %xmm3
4595; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
4596; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3]
4597; SSE41-NEXT:    movdqa %xmm5, %xmm0
4598; SSE41-NEXT:    pand %xmm7, %xmm0
4599; SSE41-NEXT:    movdqa %xmm5, %xmm1
4600; SSE41-NEXT:    psrlq $1, %xmm1
4601; SSE41-NEXT:    por %xmm0, %xmm1
4602; SSE41-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
4603; SSE41-NEXT:    movaps %xmm5, %xmm0
4604; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm5
4605; SSE41-NEXT:    movq %xmm5, %rax
4606; SSE41-NEXT:    xorps %xmm0, %xmm0
4607; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4608; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3]
4609; SSE41-NEXT:    pextrq $1, %xmm5, %rax
4610; SSE41-NEXT:    xorps %xmm0, %xmm0
4611; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4612; SSE41-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0]
4613; SSE41-NEXT:    movaps %xmm3, %xmm1
4614; SSE41-NEXT:    addps %xmm3, %xmm1
4615; SSE41-NEXT:    movaps %xmm4, %xmm0
4616; SSE41-NEXT:    blendvps %xmm0, %xmm1, %xmm3
4617; SSE41-NEXT:    movdqa %xmm6, %xmm0
4618; SSE41-NEXT:    pand %xmm7, %xmm0
4619; SSE41-NEXT:    movdqa %xmm6, %xmm1
4620; SSE41-NEXT:    psrlq $1, %xmm1
4621; SSE41-NEXT:    por %xmm0, %xmm1
4622; SSE41-NEXT:    movdqa %xmm6, %xmm4
4623; SSE41-NEXT:    movdqa %xmm6, %xmm0
4624; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
4625; SSE41-NEXT:    pextrq $1, %xmm4, %rax
4626; SSE41-NEXT:    xorps %xmm0, %xmm0
4627; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4628; SSE41-NEXT:    movq %xmm4, %rax
4629; SSE41-NEXT:    xorps %xmm1, %xmm1
4630; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
4631; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
4632; SSE41-NEXT:    pand %xmm2, %xmm7
4633; SSE41-NEXT:    movdqa %xmm2, %xmm4
4634; SSE41-NEXT:    psrlq $1, %xmm4
4635; SSE41-NEXT:    por %xmm7, %xmm4
4636; SSE41-NEXT:    shufps {{.*#+}} xmm6 = xmm6[1,3],xmm2[1,3]
4637; SSE41-NEXT:    movaps %xmm2, %xmm0
4638; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
4639; SSE41-NEXT:    movq %xmm2, %rax
4640; SSE41-NEXT:    xorps %xmm0, %xmm0
4641; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4642; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
4643; SSE41-NEXT:    pextrq $1, %xmm2, %rax
4644; SSE41-NEXT:    xorps %xmm0, %xmm0
4645; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
4646; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
4647; SSE41-NEXT:    movaps %xmm1, %xmm2
4648; SSE41-NEXT:    addps %xmm1, %xmm2
4649; SSE41-NEXT:    movaps %xmm6, %xmm0
4650; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
4651; SSE41-NEXT:    movaps %xmm3, %xmm0
4652; SSE41-NEXT:    retq
4653;
4654; AVX1-LABEL: uitofp_load_8i64_to_8f32:
4655; AVX1:       # %bb.0:
4656; AVX1-NEXT:    vmovaps (%rdi), %ymm0
4657; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
4658; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1]
4659; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm3
4660; AVX1-NEXT:    vpsrlq $1, %xmm1, %xmm4
4661; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm5
4662; AVX1-NEXT:    vpsrlq $1, %xmm5, %xmm6
4663; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
4664; AVX1-NEXT:    vorps %ymm3, %ymm4, %ymm3
4665; AVX1-NEXT:    vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
4666; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
4667; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm4
4668; AVX1-NEXT:    vmovq %xmm3, %rax
4669; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm6
4670; AVX1-NEXT:    vinsertps {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[2,3]
4671; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
4672; AVX1-NEXT:    vmovq %xmm3, %rax
4673; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm6
4674; AVX1-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
4675; AVX1-NEXT:    vpextrq $1, %xmm3, %rax
4676; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm3
4677; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
4678; AVX1-NEXT:    vaddps %xmm3, %xmm3, %xmm4
4679; AVX1-NEXT:    vpackssdw %xmm5, %xmm1, %xmm1
4680; AVX1-NEXT:    vblendvps %xmm1, %xmm4, %xmm3, %xmm1
4681; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm2
4682; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm3
4683; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm4
4684; AVX1-NEXT:    vpsrlq $1, %xmm4, %xmm5
4685; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm3, %ymm3
4686; AVX1-NEXT:    vorps %ymm2, %ymm3, %ymm2
4687; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
4688; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
4689; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm3
4690; AVX1-NEXT:    vmovq %xmm2, %rax
4691; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm5
4692; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
4693; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
4694; AVX1-NEXT:    vmovq %xmm2, %rax
4695; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm5
4696; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
4697; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
4698; AVX1-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm2
4699; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
4700; AVX1-NEXT:    vaddps %xmm2, %xmm2, %xmm3
4701; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
4702; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
4703; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4704; AVX1-NEXT:    retq
4705;
4706; AVX2-LABEL: uitofp_load_8i64_to_8f32:
4707; AVX2:       # %bb.0:
4708; AVX2-NEXT:    vmovaps (%rdi), %ymm0
4709; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
4710; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
4711; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm3
4712; AVX2-NEXT:    vpsrlq $1, %ymm1, %ymm4
4713; AVX2-NEXT:    vpor %ymm3, %ymm4, %ymm3
4714; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
4715; AVX2-NEXT:    vpextrq $1, %xmm3, %rax
4716; AVX2-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
4717; AVX2-NEXT:    vmovq %xmm3, %rax
4718; AVX2-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm5
4719; AVX2-NEXT:    vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
4720; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm3
4721; AVX2-NEXT:    vmovq %xmm3, %rax
4722; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm5
4723; AVX2-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
4724; AVX2-NEXT:    vpextrq $1, %xmm3, %rax
4725; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
4726; AVX2-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
4727; AVX2-NEXT:    vaddps %xmm3, %xmm3, %xmm4
4728; AVX2-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
4729; AVX2-NEXT:    vblendvps %xmm1, %xmm4, %xmm3, %xmm1
4730; AVX2-NEXT:    vandps %ymm2, %ymm0, %ymm2
4731; AVX2-NEXT:    vpsrlq $1, %ymm0, %ymm3
4732; AVX2-NEXT:    vpor %ymm2, %ymm3, %ymm2
4733; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
4734; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
4735; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
4736; AVX2-NEXT:    vmovq %xmm2, %rax
4737; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
4738; AVX2-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
4739; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
4740; AVX2-NEXT:    vmovq %xmm2, %rax
4741; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
4742; AVX2-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
4743; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
4744; AVX2-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm2
4745; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
4746; AVX2-NEXT:    vaddps %xmm2, %xmm2, %xmm3
4747; AVX2-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
4748; AVX2-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
4749; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4750; AVX2-NEXT:    retq
4751;
4752; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
4753; AVX512F:       # %bb.0:
4754; AVX512F-NEXT:    vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
4755; AVX512F-NEXT:    vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
4756; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4757; AVX512F-NEXT:    vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
4758; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4759; AVX512F-NEXT:    vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
4760; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4761; AVX512F-NEXT:    vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
4762; AVX512F-NEXT:    vcvtusi2ssq (%rdi), %xmm2, %xmm2
4763; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4764; AVX512F-NEXT:    vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
4765; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4766; AVX512F-NEXT:    vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
4767; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4768; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4769; AVX512F-NEXT:    retq
4770;
4771; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
4772; AVX512VL:       # %bb.0:
4773; AVX512VL-NEXT:    vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
4774; AVX512VL-NEXT:    vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
4775; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
4776; AVX512VL-NEXT:    vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
4777; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
4778; AVX512VL-NEXT:    vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
4779; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
4780; AVX512VL-NEXT:    vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
4781; AVX512VL-NEXT:    vcvtusi2ssq (%rdi), %xmm2, %xmm2
4782; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4783; AVX512VL-NEXT:    vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
4784; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4785; AVX512VL-NEXT:    vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
4786; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4787; AVX512VL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4788; AVX512VL-NEXT:    retq
4789;
4790; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
4791; AVX512DQ:       # %bb.0:
4792; AVX512DQ-NEXT:    vcvtuqq2ps (%rdi), %ymm0
4793; AVX512DQ-NEXT:    retq
4794;
4795; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32:
4796; AVX512VLDQ:       # %bb.0:
4797; AVX512VLDQ-NEXT:    vcvtuqq2ps (%rdi), %ymm0
4798; AVX512VLDQ-NEXT:    retq
4799  %ld = load <8 x i64>, ptr%a
4800  %cvt = uitofp <8 x i64> %ld to <8 x float>
4801  ret <8 x float> %cvt
4802}
4803
4804define <8 x float> @uitofp_load_8i32_to_8f32(ptr%a) {
4805; SSE2-LABEL: uitofp_load_8i32_to_8f32:
4806; SSE2:       # %bb.0:
4807; SSE2-NEXT:    movdqa (%rdi), %xmm0
4808; SSE2-NEXT:    movdqa 16(%rdi), %xmm1
4809; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
4810; SSE2-NEXT:    movdqa %xmm0, %xmm3
4811; SSE2-NEXT:    pand %xmm2, %xmm3
4812; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
4813; SSE2-NEXT:    por %xmm4, %xmm3
4814; SSE2-NEXT:    psrld $16, %xmm0
4815; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
4816; SSE2-NEXT:    por %xmm5, %xmm0
4817; SSE2-NEXT:    movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4818; SSE2-NEXT:    subps %xmm6, %xmm0
4819; SSE2-NEXT:    addps %xmm3, %xmm0
4820; SSE2-NEXT:    pand %xmm1, %xmm2
4821; SSE2-NEXT:    por %xmm4, %xmm2
4822; SSE2-NEXT:    psrld $16, %xmm1
4823; SSE2-NEXT:    por %xmm5, %xmm1
4824; SSE2-NEXT:    subps %xmm6, %xmm1
4825; SSE2-NEXT:    addps %xmm2, %xmm1
4826; SSE2-NEXT:    retq
4827;
4828; SSE41-LABEL: uitofp_load_8i32_to_8f32:
4829; SSE41:       # %bb.0:
4830; SSE41-NEXT:    movdqa (%rdi), %xmm0
4831; SSE41-NEXT:    movdqa 16(%rdi), %xmm1
4832; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200]
4833; SSE41-NEXT:    movdqa %xmm0, %xmm3
4834; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
4835; SSE41-NEXT:    psrld $16, %xmm0
4836; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928]
4837; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
4838; SSE41-NEXT:    movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4839; SSE41-NEXT:    subps %xmm5, %xmm0
4840; SSE41-NEXT:    addps %xmm3, %xmm0
4841; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
4842; SSE41-NEXT:    psrld $16, %xmm1
4843; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
4844; SSE41-NEXT:    subps %xmm5, %xmm1
4845; SSE41-NEXT:    addps %xmm2, %xmm1
4846; SSE41-NEXT:    retq
4847;
4848; AVX1-LABEL: uitofp_load_8i32_to_8f32:
4849; AVX1:       # %bb.0:
4850; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
4851; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
4852; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm2
4853; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
4854; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
4855; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
4856; AVX1-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
4857; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
4858; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
4859; AVX1-NEXT:    vaddps %ymm0, %ymm1, %ymm0
4860; AVX1-NEXT:    retq
4861;
4862; AVX2-LABEL: uitofp_load_8i32_to_8f32:
4863; AVX2:       # %bb.0:
4864; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4865; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
4866; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
4867; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
4868; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
4869; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
4870; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
4871; AVX2-NEXT:    vsubps %ymm2, %ymm0, %ymm0
4872; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
4873; AVX2-NEXT:    retq
4874;
4875; AVX512F-LABEL: uitofp_load_8i32_to_8f32:
4876; AVX512F:       # %bb.0:
4877; AVX512F-NEXT:    vmovaps (%rdi), %ymm0
4878; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
4879; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4880; AVX512F-NEXT:    retq
4881;
4882; AVX512VL-LABEL: uitofp_load_8i32_to_8f32:
4883; AVX512VL:       # %bb.0:
4884; AVX512VL-NEXT:    vcvtudq2ps (%rdi), %ymm0
4885; AVX512VL-NEXT:    retq
4886;
4887; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32:
4888; AVX512DQ:       # %bb.0:
4889; AVX512DQ-NEXT:    vmovaps (%rdi), %ymm0
4890; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
4891; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4892; AVX512DQ-NEXT:    retq
4893;
4894; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32:
4895; AVX512VLDQ:       # %bb.0:
4896; AVX512VLDQ-NEXT:    vcvtudq2ps (%rdi), %ymm0
4897; AVX512VLDQ-NEXT:    retq
4898  %ld = load <8 x i32>, ptr%a
4899  %cvt = uitofp <8 x i32> %ld to <8 x float>
4900  ret <8 x float> %cvt
4901}
4902
4903define <8 x float> @uitofp_load_8i16_to_8f32(ptr%a) {
4904; SSE2-LABEL: uitofp_load_8i16_to_8f32:
4905; SSE2:       # %bb.0:
4906; SSE2-NEXT:    movdqa (%rdi), %xmm1
4907; SSE2-NEXT:    pxor %xmm2, %xmm2
4908; SSE2-NEXT:    movdqa %xmm1, %xmm0
4909; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4910; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4911; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4912; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
4913; SSE2-NEXT:    retq
4914;
4915; SSE41-LABEL: uitofp_load_8i16_to_8f32:
4916; SSE41:       # %bb.0:
4917; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4918; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4919; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4920; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm1
4921; SSE41-NEXT:    retq
4922;
4923; AVX1-LABEL: uitofp_load_8i16_to_8f32:
4924; AVX1:       # %bb.0:
4925; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4926; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4927; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4928; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
4929; AVX1-NEXT:    retq
4930;
4931; AVX2-LABEL: uitofp_load_8i16_to_8f32:
4932; AVX2:       # %bb.0:
4933; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4934; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
4935; AVX2-NEXT:    retq
4936;
4937; AVX512-LABEL: uitofp_load_8i16_to_8f32:
4938; AVX512:       # %bb.0:
4939; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4940; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
4941; AVX512-NEXT:    retq
4942  %ld = load <8 x i16>, ptr%a
4943  %cvt = uitofp <8 x i16> %ld to <8 x float>
4944  ret <8 x float> %cvt
4945}
4946
4947define <8 x float> @uitofp_load_8i8_to_8f32(ptr%a) {
4948; SSE2-LABEL: uitofp_load_8i8_to_8f32:
4949; SSE2:       # %bb.0:
4950; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
4951; SSE2-NEXT:    pxor %xmm2, %xmm2
4952; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4953; SSE2-NEXT:    movdqa %xmm1, %xmm0
4954; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4955; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
4956; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4957; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
4958; SSE2-NEXT:    retq
4959;
4960; SSE41-LABEL: uitofp_load_8i8_to_8f32:
4961; SSE41:       # %bb.0:
4962; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4963; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4964; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
4965; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm1
4966; SSE41-NEXT:    retq
4967;
4968; AVX1-LABEL: uitofp_load_8i8_to_8f32:
4969; AVX1:       # %bb.0:
4970; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4971; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4972; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4973; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
4974; AVX1-NEXT:    retq
4975;
4976; AVX2-LABEL: uitofp_load_8i8_to_8f32:
4977; AVX2:       # %bb.0:
4978; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4979; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
4980; AVX2-NEXT:    retq
4981;
4982; AVX512-LABEL: uitofp_load_8i8_to_8f32:
4983; AVX512:       # %bb.0:
4984; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4985; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
4986; AVX512-NEXT:    retq
4987  %ld = load <8 x i8>, ptr%a
4988  %cvt = uitofp <8 x i8> %ld to <8 x float>
4989  ret <8 x float> %cvt
4990}
4991
4992;
4993; Aggregates
4994;
4995
4996%Arguments = type <{ <8 x i8>, <8 x i16>, ptr }>
4997define void @aggregate_sitofp_8i16_to_8f32(ptr nocapture readonly %a0) {
4998; SSE2-LABEL: aggregate_sitofp_8i16_to_8f32:
4999; SSE2:       # %bb.0:
5000; SSE2-NEXT:    movq 24(%rdi), %rax
5001; SSE2-NEXT:    movdqu 8(%rdi), %xmm0
5002; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5003; SSE2-NEXT:    psrad $16, %xmm1
5004; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
5005; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
5006; SSE2-NEXT:    psrad $16, %xmm0
5007; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
5008; SSE2-NEXT:    movaps %xmm0, 16(%rax)
5009; SSE2-NEXT:    movaps %xmm1, (%rax)
5010; SSE2-NEXT:    retq
5011;
5012; SSE41-LABEL: aggregate_sitofp_8i16_to_8f32:
5013; SSE41:       # %bb.0:
5014; SSE41-NEXT:    movq 24(%rdi), %rax
5015; SSE41-NEXT:    pmovsxwd 16(%rdi), %xmm0
5016; SSE41-NEXT:    pmovsxwd 8(%rdi), %xmm1
5017; SSE41-NEXT:    cvtdq2ps %xmm1, %xmm1
5018; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
5019; SSE41-NEXT:    movaps %xmm0, 16(%rax)
5020; SSE41-NEXT:    movaps %xmm1, (%rax)
5021; SSE41-NEXT:    retq
5022;
5023; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
5024; AVX1:       # %bb.0:
5025; AVX1-NEXT:    movq 24(%rdi), %rax
5026; AVX1-NEXT:    vpmovsxwd 16(%rdi), %xmm0
5027; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm1
5028; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5029; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
5030; AVX1-NEXT:    vmovaps %ymm0, (%rax)
5031; AVX1-NEXT:    vzeroupper
5032; AVX1-NEXT:    retq
5033;
5034; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
5035; AVX2:       # %bb.0:
5036; AVX2-NEXT:    movq 24(%rdi), %rax
5037; AVX2-NEXT:    vpmovsxwd 8(%rdi), %ymm0
5038; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
5039; AVX2-NEXT:    vmovaps %ymm0, (%rax)
5040; AVX2-NEXT:    vzeroupper
5041; AVX2-NEXT:    retq
5042;
5043; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32:
5044; AVX512:       # %bb.0:
5045; AVX512-NEXT:    movq 24(%rdi), %rax
5046; AVX512-NEXT:    vpmovsxwd 8(%rdi), %ymm0
5047; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
5048; AVX512-NEXT:    vmovaps %ymm0, (%rax)
5049; AVX512-NEXT:    vzeroupper
5050; AVX512-NEXT:    retq
5051 %1 = load %Arguments, ptr %a0, align 1
5052 %2 = extractvalue %Arguments %1, 1
5053 %3 = extractvalue %Arguments %1, 2
5054 %4 = sitofp <8 x i16> %2 to <8 x float>
5055 store <8 x float> %4, ptr %3, align 32
5056 ret void
5057}
5058
5059define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
5060; SSE-LABEL: sitofp_i32_to_2f64:
5061; SSE:       # %bb.0:
5062; SSE-NEXT:    cvtsi2sd %edi, %xmm0
5063; SSE-NEXT:    retq
5064;
5065; AVX-LABEL: sitofp_i32_to_2f64:
5066; AVX:       # %bb.0:
5067; AVX-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
5068; AVX-NEXT:    retq
5069  %cvt = sitofp i32 %a1 to double
5070  %res = insertelement <2 x double> %a0, double %cvt, i32 0
5071  ret <2 x double> %res
5072}
5073
5074define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
5075; SSE-LABEL: sitofp_i32_to_4f32:
5076; SSE:       # %bb.0:
5077; SSE-NEXT:    cvtsi2ss %edi, %xmm0
5078; SSE-NEXT:    retq
5079;
5080; AVX-LABEL: sitofp_i32_to_4f32:
5081; AVX:       # %bb.0:
5082; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
5083; AVX-NEXT:    retq
5084  %cvt = sitofp i32 %a1 to float
5085  %res = insertelement <4 x float> %a0, float %cvt, i32 0
5086  ret <4 x float> %res
5087}
5088
5089define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
5090; SSE-LABEL: sitofp_i64_to_2f64:
5091; SSE:       # %bb.0:
5092; SSE-NEXT:    cvtsi2sd %rdi, %xmm0
5093; SSE-NEXT:    retq
5094;
5095; AVX-LABEL: sitofp_i64_to_2f64:
5096; AVX:       # %bb.0:
5097; AVX-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
5098; AVX-NEXT:    retq
5099  %cvt = sitofp i64 %a1 to double
5100  %res = insertelement <2 x double> %a0, double %cvt, i32 0
5101  ret <2 x double> %res
5102}
5103
5104define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
5105; SSE-LABEL: sitofp_i64_to_4f32:
5106; SSE:       # %bb.0:
5107; SSE-NEXT:    cvtsi2ss %rdi, %xmm0
5108; SSE-NEXT:    retq
5109;
5110; AVX-LABEL: sitofp_i64_to_4f32:
5111; AVX:       # %bb.0:
5112; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
5113; AVX-NEXT:    retq
5114  %cvt = sitofp i64 %a1 to float
5115  %res = insertelement <4 x float> %a0, float %cvt, i32 0
5116  ret <4 x float> %res
5117}
5118
5119; Extract from int vector and convert to FP.
5120
5121define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
5122; SSE-LABEL: extract0_sitofp_v4i32_f32:
5123; SSE:       # %bb.0:
5124; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
5125; SSE-NEXT:    retq
5126;
5127; AVX-LABEL: extract0_sitofp_v4i32_f32:
5128; AVX:       # %bb.0:
5129; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
5130; AVX-NEXT:    retq
5131  %e = extractelement <4 x i32> %x, i32 0
5132  %r = sitofp i32 %e to float
5133  ret float %r
5134}
5135
5136define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind {
5137; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
5138; SSE:       # %bb.0:
5139; SSE-NEXT:    movd %xmm0, %eax
5140; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
5141; SSE-NEXT:    incl %eax
5142; SSE-NEXT:    cvtsi2ss %eax, %xmm1
5143; SSE-NEXT:    divss %xmm1, %xmm0
5144; SSE-NEXT:    retq
5145;
5146; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
5147; AVX:       # %bb.0:
5148; AVX-NEXT:    vmovd %xmm0, %eax
5149; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
5150; AVX-NEXT:    incl %eax
5151; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm1
5152; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
5153; AVX-NEXT:    retq
5154  %e = extractelement <4 x i32> %x, i32 0
5155  %f = sitofp i32 %e to float
5156  %e1 = add i32 %e, 1
5157  %f1 = sitofp i32 %e1 to float
5158  %r = fdiv float %f, %f1
5159  ret float %r
5160}
5161
5162define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, ptr %p) nounwind {
5163; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
5164; SSE:       # %bb.0:
5165; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
5166; SSE-NEXT:    movss %xmm0, (%rdi)
5167; SSE-NEXT:    movaps %xmm1, %xmm0
5168; SSE-NEXT:    retq
5169;
5170; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
5171; AVX:       # %bb.0:
5172; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm1
5173; AVX-NEXT:    vmovss %xmm0, (%rdi)
5174; AVX-NEXT:    vmovaps %xmm1, %xmm0
5175; AVX-NEXT:    retq
5176  %e = extractelement <4 x i32> %x, i32 0
5177  %r = sitofp i32 %e to float
5178  store i32 %e, ptr %p
5179  ret float %r
5180}
5181
5182define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
5183; SSE-LABEL: extract0_sitofp_v4i32_f64:
5184; SSE:       # %bb.0:
5185; SSE-NEXT:    movd %xmm0, %eax
5186; SSE-NEXT:    xorps %xmm0, %xmm0
5187; SSE-NEXT:    cvtsi2sd %eax, %xmm0
5188; SSE-NEXT:    retq
5189;
5190; AVX-LABEL: extract0_sitofp_v4i32_f64:
5191; AVX:       # %bb.0:
5192; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
5193; AVX-NEXT:    retq
5194  %e = extractelement <4 x i32> %x, i32 0
5195  %r = sitofp i32 %e to double
5196  ret double %r
5197}
5198
5199define float @extract0_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
5200; SSE-LABEL: extract0_uitofp_v4i32_f32:
5201; SSE:       # %bb.0:
5202; SSE-NEXT:    movd %xmm0, %eax
5203; SSE-NEXT:    xorps %xmm0, %xmm0
5204; SSE-NEXT:    cvtsi2ss %rax, %xmm0
5205; SSE-NEXT:    retq
5206;
5207; VEX-LABEL: extract0_uitofp_v4i32_f32:
5208; VEX:       # %bb.0:
5209; VEX-NEXT:    vmovd %xmm0, %eax
5210; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm0
5211; VEX-NEXT:    retq
5212;
5213; AVX512F-LABEL: extract0_uitofp_v4i32_f32:
5214; AVX512F:       # %bb.0:
5215; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5216; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
5217; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5218; AVX512F-NEXT:    vzeroupper
5219; AVX512F-NEXT:    retq
5220;
5221; AVX512VL-LABEL: extract0_uitofp_v4i32_f32:
5222; AVX512VL:       # %bb.0:
5223; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
5224; AVX512VL-NEXT:    retq
5225;
5226; AVX512DQ-LABEL: extract0_uitofp_v4i32_f32:
5227; AVX512DQ:       # %bb.0:
5228; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5229; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
5230; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5231; AVX512DQ-NEXT:    vzeroupper
5232; AVX512DQ-NEXT:    retq
5233;
5234; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f32:
5235; AVX512VLDQ:       # %bb.0:
5236; AVX512VLDQ-NEXT:    vcvtudq2ps %xmm0, %xmm0
5237; AVX512VLDQ-NEXT:    retq
5238  %e = extractelement <4 x i32> %x, i32 0
5239  %r = uitofp i32 %e to float
5240  ret float %r
5241}
5242
5243define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
5244; SSE-LABEL: extract0_uitofp_v4i32_f64:
5245; SSE:       # %bb.0:
5246; SSE-NEXT:    movd %xmm0, %eax
5247; SSE-NEXT:    xorps %xmm0, %xmm0
5248; SSE-NEXT:    cvtsi2sd %rax, %xmm0
5249; SSE-NEXT:    retq
5250;
5251; VEX-LABEL: extract0_uitofp_v4i32_f64:
5252; VEX:       # %bb.0:
5253; VEX-NEXT:    vmovd %xmm0, %eax
5254; VEX-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
5255; VEX-NEXT:    retq
5256;
5257; AVX512F-LABEL: extract0_uitofp_v4i32_f64:
5258; AVX512F:       # %bb.0:
5259; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
5260; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
5261; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5262; AVX512F-NEXT:    vzeroupper
5263; AVX512F-NEXT:    retq
5264;
5265; AVX512VL-LABEL: extract0_uitofp_v4i32_f64:
5266; AVX512VL:       # %bb.0:
5267; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
5268; AVX512VL-NEXT:    retq
5269;
5270; AVX512DQ-LABEL: extract0_uitofp_v4i32_f64:
5271; AVX512DQ:       # %bb.0:
5272; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
5273; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
5274; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5275; AVX512DQ-NEXT:    vzeroupper
5276; AVX512DQ-NEXT:    retq
5277;
5278; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f64:
5279; AVX512VLDQ:       # %bb.0:
5280; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0
5281; AVX512VLDQ-NEXT:    retq
5282  %e = extractelement <4 x i32> %x, i32 0
5283  %r = uitofp i32 %e to double
5284  ret double %r
5285}
5286
5287; Extract non-zero element from int vector and convert to FP.
5288
5289define float @extract3_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
5290; SSE-LABEL: extract3_sitofp_v4i32_f32:
5291; SSE:       # %bb.0:
5292; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5293; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
5294; SSE-NEXT:    retq
5295;
5296; AVX-LABEL: extract3_sitofp_v4i32_f32:
5297; AVX:       # %bb.0:
5298; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5299; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
5300; AVX-NEXT:    retq
5301  %e = extractelement <4 x i32> %x, i32 3
5302  %r = sitofp i32 %e to float
5303  ret float %r
5304}
5305
5306define double @extract3_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
5307; SSE2-LABEL: extract3_sitofp_v4i32_f64:
5308; SSE2:       # %bb.0:
5309; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5310; SSE2-NEXT:    movd %xmm0, %eax
5311; SSE2-NEXT:    xorps %xmm0, %xmm0
5312; SSE2-NEXT:    cvtsi2sd %eax, %xmm0
5313; SSE2-NEXT:    retq
5314;
5315; SSE41-LABEL: extract3_sitofp_v4i32_f64:
5316; SSE41:       # %bb.0:
5317; SSE41-NEXT:    extractps $3, %xmm0, %eax
5318; SSE41-NEXT:    xorps %xmm0, %xmm0
5319; SSE41-NEXT:    cvtsi2sd %eax, %xmm0
5320; SSE41-NEXT:    retq
5321;
5322; AVX-LABEL: extract3_sitofp_v4i32_f64:
5323; AVX:       # %bb.0:
5324; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5325; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
5326; AVX-NEXT:    retq
5327  %e = extractelement <4 x i32> %x, i32 3
5328  %r = sitofp i32 %e to double
5329  ret double %r
5330}
5331
5332define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
5333; SSE2-LABEL: extract3_uitofp_v4i32_f32:
5334; SSE2:       # %bb.0:
5335; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5336; SSE2-NEXT:    movd %xmm0, %eax
5337; SSE2-NEXT:    xorps %xmm0, %xmm0
5338; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
5339; SSE2-NEXT:    retq
5340;
5341; SSE41-LABEL: extract3_uitofp_v4i32_f32:
5342; SSE41:       # %bb.0:
5343; SSE41-NEXT:    extractps $3, %xmm0, %eax
5344; SSE41-NEXT:    xorps %xmm0, %xmm0
5345; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
5346; SSE41-NEXT:    retq
5347;
5348; VEX-LABEL: extract3_uitofp_v4i32_f32:
5349; VEX:       # %bb.0:
5350; VEX-NEXT:    vextractps $3, %xmm0, %eax
5351; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm0
5352; VEX-NEXT:    retq
5353;
5354; AVX512F-LABEL: extract3_uitofp_v4i32_f32:
5355; AVX512F:       # %bb.0:
5356; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5357; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
5358; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5359; AVX512F-NEXT:    vzeroupper
5360; AVX512F-NEXT:    retq
5361;
5362; AVX512VL-LABEL: extract3_uitofp_v4i32_f32:
5363; AVX512VL:       # %bb.0:
5364; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5365; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
5366; AVX512VL-NEXT:    retq
5367;
5368; AVX512DQ-LABEL: extract3_uitofp_v4i32_f32:
5369; AVX512DQ:       # %bb.0:
5370; AVX512DQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5371; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
5372; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5373; AVX512DQ-NEXT:    vzeroupper
5374; AVX512DQ-NEXT:    retq
5375;
5376; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f32:
5377; AVX512VLDQ:       # %bb.0:
5378; AVX512VLDQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5379; AVX512VLDQ-NEXT:    vcvtudq2ps %xmm0, %xmm0
5380; AVX512VLDQ-NEXT:    retq
5381  %e = extractelement <4 x i32> %x, i32 3
5382  %r = uitofp i32 %e to float
5383  ret float %r
5384}
5385
5386define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
5387; SSE2-LABEL: extract3_uitofp_v4i32_f64:
5388; SSE2:       # %bb.0:
5389; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
5390; SSE2-NEXT:    movd %xmm0, %eax
5391; SSE2-NEXT:    xorps %xmm0, %xmm0
5392; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
5393; SSE2-NEXT:    retq
5394;
5395; SSE41-LABEL: extract3_uitofp_v4i32_f64:
5396; SSE41:       # %bb.0:
5397; SSE41-NEXT:    extractps $3, %xmm0, %eax
5398; SSE41-NEXT:    xorps %xmm0, %xmm0
5399; SSE41-NEXT:    cvtsi2sd %rax, %xmm0
5400; SSE41-NEXT:    retq
5401;
5402; VEX-LABEL: extract3_uitofp_v4i32_f64:
5403; VEX:       # %bb.0:
5404; VEX-NEXT:    vextractps $3, %xmm0, %eax
5405; VEX-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
5406; VEX-NEXT:    retq
5407;
5408; AVX512F-LABEL: extract3_uitofp_v4i32_f64:
5409; AVX512F:       # %bb.0:
5410; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5411; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
5412; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5413; AVX512F-NEXT:    vzeroupper
5414; AVX512F-NEXT:    retq
5415;
5416; AVX512VL-LABEL: extract3_uitofp_v4i32_f64:
5417; AVX512VL:       # %bb.0:
5418; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5419; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
5420; AVX512VL-NEXT:    retq
5421;
5422; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64:
5423; AVX512DQ:       # %bb.0:
5424; AVX512DQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5425; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
5426; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5427; AVX512DQ-NEXT:    vzeroupper
5428; AVX512DQ-NEXT:    retq
5429;
5430; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64:
5431; AVX512VLDQ:       # %bb.0:
5432; AVX512VLDQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
5433; AVX512VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0
5434; AVX512VLDQ-NEXT:    retq
5435  %e = extractelement <4 x i32> %x, i32 3
5436  %r = uitofp i32 %e to double
5437  ret double %r
5438}
5439
5440define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 {
5441; SSE2-LABEL: PR43609:
5442; SSE2:       # %bb.0:
5443; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2,2]
5444; SSE2-NEXT:    paddq %xmm0, %xmm1
5445; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
5446; SSE2-NEXT:    movdqa %xmm0, %xmm3
5447; SSE2-NEXT:    pand %xmm2, %xmm3
5448; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5449; SSE2-NEXT:    por %xmm4, %xmm3
5450; SSE2-NEXT:    psrlq $32, %xmm0
5451; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5452; SSE2-NEXT:    por %xmm5, %xmm0
5453; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5454; SSE2-NEXT:    subpd %xmm6, %xmm0
5455; SSE2-NEXT:    addpd %xmm3, %xmm0
5456; SSE2-NEXT:    pand %xmm1, %xmm2
5457; SSE2-NEXT:    por %xmm4, %xmm2
5458; SSE2-NEXT:    psrlq $32, %xmm1
5459; SSE2-NEXT:    por %xmm5, %xmm1
5460; SSE2-NEXT:    subpd %xmm6, %xmm1
5461; SSE2-NEXT:    addpd %xmm2, %xmm1
5462; SSE2-NEXT:    movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5463; SSE2-NEXT:    addpd %xmm2, %xmm0
5464; SSE2-NEXT:    addpd %xmm2, %xmm1
5465; SSE2-NEXT:    movupd %xmm0, (%rdi)
5466; SSE2-NEXT:    movupd %xmm1, 16(%rdi)
5467; SSE2-NEXT:    retq
5468;
5469; SSE41-LABEL: PR43609:
5470; SSE41:       # %bb.0:
5471; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [2,2]
5472; SSE41-NEXT:    paddq %xmm0, %xmm1
5473; SSE41-NEXT:    pxor %xmm2, %xmm2
5474; SSE41-NEXT:    movdqa %xmm0, %xmm3
5475; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
5476; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5477; SSE41-NEXT:    por %xmm4, %xmm3
5478; SSE41-NEXT:    psrlq $32, %xmm0
5479; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5480; SSE41-NEXT:    por %xmm5, %xmm0
5481; SSE41-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5482; SSE41-NEXT:    subpd %xmm6, %xmm0
5483; SSE41-NEXT:    addpd %xmm3, %xmm0
5484; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
5485; SSE41-NEXT:    por %xmm4, %xmm2
5486; SSE41-NEXT:    psrlq $32, %xmm1
5487; SSE41-NEXT:    por %xmm5, %xmm1
5488; SSE41-NEXT:    subpd %xmm6, %xmm1
5489; SSE41-NEXT:    addpd %xmm2, %xmm1
5490; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5491; SSE41-NEXT:    addpd %xmm2, %xmm0
5492; SSE41-NEXT:    addpd %xmm2, %xmm1
5493; SSE41-NEXT:    movupd %xmm0, (%rdi)
5494; SSE41-NEXT:    movupd %xmm1, 16(%rdi)
5495; SSE41-NEXT:    retq
5496;
5497; AVX1-LABEL: PR43609:
5498; AVX1:       # %bb.0:
5499; AVX1-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5500; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5501; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
5502; AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5503; AVX1-NEXT:    # xmm4 = mem[0,0]
5504; AVX1-NEXT:    vpor %xmm4, %xmm3, %xmm3
5505; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
5506; AVX1-NEXT:    vmovddup {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5507; AVX1-NEXT:    # xmm5 = mem[0,0]
5508; AVX1-NEXT:    vpor %xmm5, %xmm0, %xmm0
5509; AVX1-NEXT:    vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5510; AVX1-NEXT:    # xmm6 = mem[0,0]
5511; AVX1-NEXT:    vsubpd %xmm6, %xmm0, %xmm0
5512; AVX1-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
5513; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
5514; AVX1-NEXT:    vpor %xmm4, %xmm2, %xmm2
5515; AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
5516; AVX1-NEXT:    vpor %xmm5, %xmm1, %xmm1
5517; AVX1-NEXT:    vsubpd %xmm6, %xmm1, %xmm1
5518; AVX1-NEXT:    vaddpd %xmm1, %xmm2, %xmm1
5519; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5520; AVX1-NEXT:    # xmm2 = mem[0,0]
5521; AVX1-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5522; AVX1-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5523; AVX1-NEXT:    vmovupd %xmm0, (%rdi)
5524; AVX1-NEXT:    vmovupd %xmm1, 16(%rdi)
5525; AVX1-NEXT:    retq
5526;
5527; AVX2-LABEL: PR43609:
5528; AVX2:       # %bb.0:
5529; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5530; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5531; AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5532; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5533; AVX2-NEXT:    vpor %xmm4, %xmm3, %xmm3
5534; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
5535; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5536; AVX2-NEXT:    vpor %xmm5, %xmm0, %xmm0
5537; AVX2-NEXT:    vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5538; AVX2-NEXT:    # xmm6 = mem[0,0]
5539; AVX2-NEXT:    vsubpd %xmm6, %xmm0, %xmm0
5540; AVX2-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
5541; AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5542; AVX2-NEXT:    vpor %xmm4, %xmm2, %xmm2
5543; AVX2-NEXT:    vpsrlq $32, %xmm1, %xmm1
5544; AVX2-NEXT:    vpor %xmm5, %xmm1, %xmm1
5545; AVX2-NEXT:    vsubpd %xmm6, %xmm1, %xmm1
5546; AVX2-NEXT:    vaddpd %xmm1, %xmm2, %xmm1
5547; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5548; AVX2-NEXT:    # xmm2 = mem[0,0]
5549; AVX2-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5550; AVX2-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5551; AVX2-NEXT:    vmovupd %xmm0, (%rdi)
5552; AVX2-NEXT:    vmovupd %xmm1, 16(%rdi)
5553; AVX2-NEXT:    retq
5554;
5555; AVX512F-LABEL: PR43609:
5556; AVX512F:       # %bb.0:
5557; AVX512F-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5558; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5559; AVX512F-NEXT:    vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5560; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5561; AVX512F-NEXT:    vpor %xmm4, %xmm3, %xmm3
5562; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm0
5563; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5564; AVX512F-NEXT:    vpor %xmm5, %xmm0, %xmm0
5565; AVX512F-NEXT:    vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5566; AVX512F-NEXT:    # xmm6 = mem[0,0]
5567; AVX512F-NEXT:    vsubpd %xmm6, %xmm0, %xmm0
5568; AVX512F-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
5569; AVX512F-NEXT:    vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5570; AVX512F-NEXT:    vpor %xmm4, %xmm2, %xmm2
5571; AVX512F-NEXT:    vpsrlq $32, %xmm1, %xmm1
5572; AVX512F-NEXT:    vpor %xmm5, %xmm1, %xmm1
5573; AVX512F-NEXT:    vsubpd %xmm6, %xmm1, %xmm1
5574; AVX512F-NEXT:    vaddpd %xmm1, %xmm2, %xmm1
5575; AVX512F-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5576; AVX512F-NEXT:    # xmm2 = mem[0,0]
5577; AVX512F-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5578; AVX512F-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5579; AVX512F-NEXT:    vmovupd %xmm0, (%rdi)
5580; AVX512F-NEXT:    vmovupd %xmm1, 16(%rdi)
5581; AVX512F-NEXT:    retq
5582;
5583; AVX512VL-LABEL: PR43609:
5584; AVX512VL:       # %bb.0:
5585; AVX512VL-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
5586; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5587; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
5588; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
5589; AVX512VL-NEXT:    vpor %xmm4, %xmm3, %xmm3
5590; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm0
5591; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
5592; AVX512VL-NEXT:    vpor %xmm5, %xmm0, %xmm0
5593; AVX512VL-NEXT:    vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
5594; AVX512VL-NEXT:    # xmm6 = mem[0,0]
5595; AVX512VL-NEXT:    vsubpd %xmm6, %xmm0, %xmm0
5596; AVX512VL-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
5597; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
5598; AVX512VL-NEXT:    vpor %xmm4, %xmm2, %xmm2
5599; AVX512VL-NEXT:    vpsrlq $32, %xmm1, %xmm1
5600; AVX512VL-NEXT:    vpor %xmm5, %xmm1, %xmm1
5601; AVX512VL-NEXT:    vsubpd %xmm6, %xmm1, %xmm1
5602; AVX512VL-NEXT:    vaddpd %xmm1, %xmm2, %xmm1
5603; AVX512VL-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5604; AVX512VL-NEXT:    # xmm2 = mem[0,0]
5605; AVX512VL-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5606; AVX512VL-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5607; AVX512VL-NEXT:    vmovupd %xmm0, (%rdi)
5608; AVX512VL-NEXT:    vmovupd %xmm1, 16(%rdi)
5609; AVX512VL-NEXT:    retq
5610;
5611; AVX512DQ-LABEL: PR43609:
5612; AVX512DQ:       # %bb.0:
5613; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5614; AVX512DQ-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5615; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
5616; AVX512DQ-NEXT:    vcvtuqq2pd %zmm1, %zmm1
5617; AVX512DQ-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5618; AVX512DQ-NEXT:    # xmm2 = mem[0,0]
5619; AVX512DQ-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5620; AVX512DQ-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5621; AVX512DQ-NEXT:    vmovupd %xmm0, (%rdi)
5622; AVX512DQ-NEXT:    vmovupd %xmm1, 16(%rdi)
5623; AVX512DQ-NEXT:    vzeroupper
5624; AVX512DQ-NEXT:    retq
5625;
5626; AVX512VLDQ-LABEL: PR43609:
5627; AVX512VLDQ:       # %bb.0:
5628; AVX512VLDQ-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
5629; AVX512VLDQ-NEXT:    vcvtuqq2pd %xmm0, %xmm0
5630; AVX512VLDQ-NEXT:    vcvtuqq2pd %xmm1, %xmm1
5631; AVX512VLDQ-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
5632; AVX512VLDQ-NEXT:    # xmm2 = mem[0,0]
5633; AVX512VLDQ-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
5634; AVX512VLDQ-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
5635; AVX512VLDQ-NEXT:    vmovupd %xmm0, (%rdi)
5636; AVX512VLDQ-NEXT:    vmovupd %xmm1, 16(%rdi)
5637; AVX512VLDQ-NEXT:    retq
5638  %step.add.epil = add <2 x i64> %y, <i64 2, i64 2>
5639  %t20 = uitofp <2 x i64> %y to <2 x double>
5640  %t21 = uitofp <2 x i64> %step.add.epil to <2 x double>
5641  %t22 = fadd fast <2 x double> %t20, <double 5.0e-01, double 5.0e-01>
5642  %t23 = fadd fast <2 x double> %t21, <double 5.0e-01, double 5.0e-01>
5643  store <2 x double> %t22, ptr %x, align 8
5644  %t26 = getelementptr inbounds double, ptr %x, i64 2
5645  store <2 x double> %t23, ptr %t26, align 8
5646  ret void
5647}
5648
5649attributes #0 = { "unsafe-fp-math"="true" }
5650
5651