xref: /llvm-project/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll (revision f6ff2cc7e0ae4fd9b14583a998ddeada256a954f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-32
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-32
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-64,AVX512F-64
10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64
14; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-32
15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-64
16
17declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
18declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
19declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
20declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
21declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
22declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
23declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8>, metadata, metadata)
24declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8>, metadata, metadata)
25declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16>, metadata, metadata)
26declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16>, metadata, metadata)
27declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
28declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
29declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1>, metadata, metadata)
30declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1>, metadata, metadata)
31declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8>, metadata, metadata)
32declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8>, metadata, metadata)
33declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16>, metadata, metadata)
34declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16>, metadata, metadata)
35declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
36declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
37declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
38declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
39
40define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 {
41; SSE-LABEL: sitofp_v2i32_v2f32:
42; SSE:       # %bb.0:
43; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
44; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
45; SSE-NEXT:    ret{{[l|q]}}
46;
47; SSE41-LABEL: sitofp_v2i32_v2f32:
48; SSE41:       # %bb.0:
49; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
50; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
51; SSE41-NEXT:    ret{{[l|q]}}
52;
53; AVX-LABEL: sitofp_v2i32_v2f32:
54; AVX:       # %bb.0:
55; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
56; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
57; AVX-NEXT:    ret{{[l|q]}}
58 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
59                                                              metadata !"round.dynamic",
60                                                              metadata !"fpexcept.strict") #0
61  ret <2 x float> %result
62}
63
64define <2 x float> @uitofp_v2i32_v2f32(<2 x i32> %x) #0 {
65; SSE-LABEL: uitofp_v2i32_v2f32:
66; SSE:       # %bb.0:
67; SSE-NEXT:    xorpd %xmm1, %xmm1
68; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
69; SSE-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
70; SSE-NEXT:    orpd %xmm1, %xmm0
71; SSE-NEXT:    subpd %xmm1, %xmm0
72; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
73; SSE-NEXT:    ret{{[l|q]}}
74;
75; SSE41-LABEL: uitofp_v2i32_v2f32:
76; SSE41:       # %bb.0:
77; SSE41-NEXT:    xorpd %xmm1, %xmm1
78; SSE41-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
79; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
80; SSE41-NEXT:    orpd %xmm1, %xmm0
81; SSE41-NEXT:    subpd %xmm1, %xmm0
82; SSE41-NEXT:    cvtpd2ps %xmm0, %xmm0
83; SSE41-NEXT:    ret{{[l|q]}}
84;
85; AVX1-LABEL: uitofp_v2i32_v2f32:
86; AVX1:       # %bb.0:
87; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
88; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
89; AVX1-NEXT:    # xmm1 = mem[0,0]
90; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
91; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
92; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
93; AVX1-NEXT:    ret{{[l|q]}}
94;
95; AVX512F-LABEL: uitofp_v2i32_v2f32:
96; AVX512F:       # %bb.0:
97; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
98; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
99; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
100; AVX512F-NEXT:    vzeroupper
101; AVX512F-NEXT:    ret{{[l|q]}}
102;
103; AVX512VL-LABEL: uitofp_v2i32_v2f32:
104; AVX512VL:       # %bb.0:
105; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
106; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
107; AVX512VL-NEXT:    ret{{[l|q]}}
108;
109; AVX512DQ-LABEL: uitofp_v2i32_v2f32:
110; AVX512DQ:       # %bb.0:
111; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
112; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
113; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
114; AVX512DQ-NEXT:    vzeroupper
115; AVX512DQ-NEXT:    ret{{[l|q]}}
116;
117; AVX512DQVL-LABEL: uitofp_v2i32_v2f32:
118; AVX512DQVL:       # %bb.0:
119; AVX512DQVL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
120; AVX512DQVL-NEXT:    vcvtudq2ps %xmm0, %xmm0
121; AVX512DQVL-NEXT:    ret{{[l|q]}}
122 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
123                                                              metadata !"round.dynamic",
124                                                              metadata !"fpexcept.strict") #0
125  ret <2 x float> %result
126}
127
128define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
129; SSE-32-LABEL: sitofp_v2i64_v2f32:
130; SSE-32:       # %bb.0:
131; SSE-32-NEXT:    pushl %ebp
132; SSE-32-NEXT:    .cfi_def_cfa_offset 8
133; SSE-32-NEXT:    .cfi_offset %ebp, -8
134; SSE-32-NEXT:    movl %esp, %ebp
135; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
136; SSE-32-NEXT:    andl $-8, %esp
137; SSE-32-NEXT:    subl $24, %esp
138; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
139; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
140; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
141; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
142; SSE-32-NEXT:    fstps (%esp)
143; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
144; SSE-32-NEXT:    fstps {{[0-9]+}}(%esp)
145; SSE-32-NEXT:    wait
146; SSE-32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
147; SSE-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
148; SSE-32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
149; SSE-32-NEXT:    movl %ebp, %esp
150; SSE-32-NEXT:    popl %ebp
151; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
152; SSE-32-NEXT:    retl
153;
154; SSE-64-LABEL: sitofp_v2i64_v2f32:
155; SSE-64:       # %bb.0:
156; SSE-64-NEXT:    movq %xmm0, %rax
157; SSE-64-NEXT:    cvtsi2ss %rax, %xmm1
158; SSE-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
159; SSE-64-NEXT:    movq %xmm0, %rax
160; SSE-64-NEXT:    xorps %xmm0, %xmm0
161; SSE-64-NEXT:    cvtsi2ss %rax, %xmm0
162; SSE-64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
163; SSE-64-NEXT:    movaps %xmm1, %xmm0
164; SSE-64-NEXT:    retq
165;
166; SSE41-32-LABEL: sitofp_v2i64_v2f32:
167; SSE41-32:       # %bb.0:
168; SSE41-32-NEXT:    pushl %ebp
169; SSE41-32-NEXT:    .cfi_def_cfa_offset 8
170; SSE41-32-NEXT:    .cfi_offset %ebp, -8
171; SSE41-32-NEXT:    movl %esp, %ebp
172; SSE41-32-NEXT:    .cfi_def_cfa_register %ebp
173; SSE41-32-NEXT:    andl $-8, %esp
174; SSE41-32-NEXT:    subl $24, %esp
175; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
176; SSE41-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
177; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
178; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
179; SSE41-32-NEXT:    fstps (%esp)
180; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
181; SSE41-32-NEXT:    fstps {{[0-9]+}}(%esp)
182; SSE41-32-NEXT:    wait
183; SSE41-32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
184; SSE41-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
185; SSE41-32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
186; SSE41-32-NEXT:    movl %ebp, %esp
187; SSE41-32-NEXT:    popl %ebp
188; SSE41-32-NEXT:    .cfi_def_cfa %esp, 4
189; SSE41-32-NEXT:    retl
190;
191; SSE41-64-LABEL: sitofp_v2i64_v2f32:
192; SSE41-64:       # %bb.0:
193; SSE41-64-NEXT:    movq %xmm0, %rax
194; SSE41-64-NEXT:    cvtsi2ss %rax, %xmm1
195; SSE41-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
196; SSE41-64-NEXT:    movq %xmm0, %rax
197; SSE41-64-NEXT:    xorps %xmm0, %xmm0
198; SSE41-64-NEXT:    cvtsi2ss %rax, %xmm0
199; SSE41-64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
200; SSE41-64-NEXT:    movaps %xmm1, %xmm0
201; SSE41-64-NEXT:    retq
202;
203; AVX-32-LABEL: sitofp_v2i64_v2f32:
204; AVX-32:       # %bb.0:
205; AVX-32-NEXT:    pushl %ebp
206; AVX-32-NEXT:    .cfi_def_cfa_offset 8
207; AVX-32-NEXT:    .cfi_offset %ebp, -8
208; AVX-32-NEXT:    movl %esp, %ebp
209; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
210; AVX-32-NEXT:    andl $-8, %esp
211; AVX-32-NEXT:    subl $24, %esp
212; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
213; AVX-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
214; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
215; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
216; AVX-32-NEXT:    fstps (%esp)
217; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
218; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
219; AVX-32-NEXT:    wait
220; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
221; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
222; AVX-32-NEXT:    movl %ebp, %esp
223; AVX-32-NEXT:    popl %ebp
224; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
225; AVX-32-NEXT:    retl
226;
227; AVX-64-LABEL: sitofp_v2i64_v2f32:
228; AVX-64:       # %bb.0:
229; AVX-64-NEXT:    vpextrq $1, %xmm0, %rax
230; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
231; AVX-64-NEXT:    vmovq %xmm0, %rax
232; AVX-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
233; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
234; AVX-64-NEXT:    retq
235;
236; AVX512DQ-32-LABEL: sitofp_v2i64_v2f32:
237; AVX512DQ-32:       # %bb.0:
238; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
239; AVX512DQ-32-NEXT:    vcvtqq2ps %zmm0, %ymm1
240; AVX512DQ-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
241; AVX512DQ-32-NEXT:    vcvtqq2ps %zmm0, %ymm0
242; AVX512DQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
243; AVX512DQ-32-NEXT:    vzeroupper
244; AVX512DQ-32-NEXT:    retl
245;
246; AVX512DQ-64-LABEL: sitofp_v2i64_v2f32:
247; AVX512DQ-64:       # %bb.0:
248; AVX512DQ-64-NEXT:    vpextrq $1, %xmm0, %rax
249; AVX512DQ-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
250; AVX512DQ-64-NEXT:    vmovq %xmm0, %rax
251; AVX512DQ-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
252; AVX512DQ-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
253; AVX512DQ-64-NEXT:    retq
254;
255; AVX512DQVL-LABEL: sitofp_v2i64_v2f32:
256; AVX512DQVL:       # %bb.0:
257; AVX512DQVL-NEXT:    vcvtqq2ps %xmm0, %xmm0
258; AVX512DQVL-NEXT:    ret{{[l|q]}}
259 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
260                                                              metadata !"round.dynamic",
261                                                              metadata !"fpexcept.strict") #0
262  ret <2 x float> %result
263}
264
265define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
266; SSE-32-LABEL: uitofp_v2i64_v2f32:
267; SSE-32:       # %bb.0:
268; SSE-32-NEXT:    pushl %ebp
269; SSE-32-NEXT:    .cfi_def_cfa_offset 8
270; SSE-32-NEXT:    .cfi_offset %ebp, -8
271; SSE-32-NEXT:    movl %esp, %ebp
272; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
273; SSE-32-NEXT:    andl $-8, %esp
274; SSE-32-NEXT:    subl $24, %esp
275; SSE-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
276; SSE-32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
277; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
278; SSE-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
279; SSE-32-NEXT:    movd %xmm1, %eax
280; SSE-32-NEXT:    shrl $31, %eax
281; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
282; SSE-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
283; SSE-32-NEXT:    fstps (%esp)
284; SSE-32-NEXT:    wait
285; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
286; SSE-32-NEXT:    movd %xmm0, %eax
287; SSE-32-NEXT:    shrl $31, %eax
288; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
289; SSE-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
290; SSE-32-NEXT:    fstps {{[0-9]+}}(%esp)
291; SSE-32-NEXT:    wait
292; SSE-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
293; SSE-32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
294; SSE-32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
295; SSE-32-NEXT:    movl %ebp, %esp
296; SSE-32-NEXT:    popl %ebp
297; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
298; SSE-32-NEXT:    retl
299;
300; SSE-64-LABEL: uitofp_v2i64_v2f32:
301; SSE-64:       # %bb.0:
302; SSE-64-NEXT:    movdqa %xmm0, %xmm1
303; SSE-64-NEXT:    movq %xmm0, %rax
304; SSE-64-NEXT:    movq %rax, %rcx
305; SSE-64-NEXT:    shrq %rcx
306; SSE-64-NEXT:    movl %eax, %edx
307; SSE-64-NEXT:    andl $1, %edx
308; SSE-64-NEXT:    orq %rcx, %rdx
309; SSE-64-NEXT:    testq %rax, %rax
310; SSE-64-NEXT:    cmovnsq %rax, %rdx
311; SSE-64-NEXT:    xorps %xmm0, %xmm0
312; SSE-64-NEXT:    cvtsi2ss %rdx, %xmm0
313; SSE-64-NEXT:    jns .LBB3_2
314; SSE-64-NEXT:  # %bb.1:
315; SSE-64-NEXT:    addss %xmm0, %xmm0
316; SSE-64-NEXT:  .LBB3_2:
317; SSE-64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
318; SSE-64-NEXT:    movq %xmm1, %rax
319; SSE-64-NEXT:    movq %rax, %rcx
320; SSE-64-NEXT:    shrq %rcx
321; SSE-64-NEXT:    movl %eax, %edx
322; SSE-64-NEXT:    andl $1, %edx
323; SSE-64-NEXT:    orq %rcx, %rdx
324; SSE-64-NEXT:    testq %rax, %rax
325; SSE-64-NEXT:    cmovnsq %rax, %rdx
326; SSE-64-NEXT:    xorps %xmm1, %xmm1
327; SSE-64-NEXT:    cvtsi2ss %rdx, %xmm1
328; SSE-64-NEXT:    jns .LBB3_4
329; SSE-64-NEXT:  # %bb.3:
330; SSE-64-NEXT:    addss %xmm1, %xmm1
331; SSE-64-NEXT:  .LBB3_4:
332; SSE-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
333; SSE-64-NEXT:    retq
334;
335; SSE41-32-LABEL: uitofp_v2i64_v2f32:
336; SSE41-32:       # %bb.0:
337; SSE41-32-NEXT:    pushl %ebp
338; SSE41-32-NEXT:    .cfi_def_cfa_offset 8
339; SSE41-32-NEXT:    .cfi_offset %ebp, -8
340; SSE41-32-NEXT:    movl %esp, %ebp
341; SSE41-32-NEXT:    .cfi_def_cfa_register %ebp
342; SSE41-32-NEXT:    andl $-8, %esp
343; SSE41-32-NEXT:    subl $24, %esp
344; SSE41-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
345; SSE41-32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
346; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
347; SSE41-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
348; SSE41-32-NEXT:    movd %xmm1, %eax
349; SSE41-32-NEXT:    shrl $31, %eax
350; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
351; SSE41-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
352; SSE41-32-NEXT:    fstps (%esp)
353; SSE41-32-NEXT:    wait
354; SSE41-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
355; SSE41-32-NEXT:    movd %xmm0, %eax
356; SSE41-32-NEXT:    shrl $31, %eax
357; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
358; SSE41-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
359; SSE41-32-NEXT:    fstps {{[0-9]+}}(%esp)
360; SSE41-32-NEXT:    wait
361; SSE41-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362; SSE41-32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
363; SSE41-32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
364; SSE41-32-NEXT:    movl %ebp, %esp
365; SSE41-32-NEXT:    popl %ebp
366; SSE41-32-NEXT:    .cfi_def_cfa %esp, 4
367; SSE41-32-NEXT:    retl
368;
369; SSE41-64-LABEL: uitofp_v2i64_v2f32:
370; SSE41-64:       # %bb.0:
371; SSE41-64-NEXT:    movdqa %xmm0, %xmm1
372; SSE41-64-NEXT:    movq %xmm0, %rax
373; SSE41-64-NEXT:    movq %rax, %rcx
374; SSE41-64-NEXT:    shrq %rcx
375; SSE41-64-NEXT:    movl %eax, %edx
376; SSE41-64-NEXT:    andl $1, %edx
377; SSE41-64-NEXT:    orq %rcx, %rdx
378; SSE41-64-NEXT:    testq %rax, %rax
379; SSE41-64-NEXT:    cmovnsq %rax, %rdx
380; SSE41-64-NEXT:    xorps %xmm0, %xmm0
381; SSE41-64-NEXT:    cvtsi2ss %rdx, %xmm0
382; SSE41-64-NEXT:    jns .LBB3_2
383; SSE41-64-NEXT:  # %bb.1:
384; SSE41-64-NEXT:    addss %xmm0, %xmm0
385; SSE41-64-NEXT:  .LBB3_2:
386; SSE41-64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
387; SSE41-64-NEXT:    movq %xmm1, %rax
388; SSE41-64-NEXT:    movq %rax, %rcx
389; SSE41-64-NEXT:    shrq %rcx
390; SSE41-64-NEXT:    movl %eax, %edx
391; SSE41-64-NEXT:    andl $1, %edx
392; SSE41-64-NEXT:    orq %rcx, %rdx
393; SSE41-64-NEXT:    testq %rax, %rax
394; SSE41-64-NEXT:    cmovnsq %rax, %rdx
395; SSE41-64-NEXT:    xorps %xmm1, %xmm1
396; SSE41-64-NEXT:    cvtsi2ss %rdx, %xmm1
397; SSE41-64-NEXT:    jns .LBB3_4
398; SSE41-64-NEXT:  # %bb.3:
399; SSE41-64-NEXT:    addss %xmm1, %xmm1
400; SSE41-64-NEXT:  .LBB3_4:
401; SSE41-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
402; SSE41-64-NEXT:    retq
403;
404; AVX-32-LABEL: uitofp_v2i64_v2f32:
405; AVX-32:       # %bb.0:
406; AVX-32-NEXT:    pushl %ebp
407; AVX-32-NEXT:    .cfi_def_cfa_offset 8
408; AVX-32-NEXT:    .cfi_offset %ebp, -8
409; AVX-32-NEXT:    movl %esp, %ebp
410; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
411; AVX-32-NEXT:    andl $-8, %esp
412; AVX-32-NEXT:    subl $24, %esp
413; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
414; AVX-32-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
415; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
416; AVX-32-NEXT:    vextractps $1, %xmm0, %eax
417; AVX-32-NEXT:    shrl $31, %eax
418; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
419; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
420; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
421; AVX-32-NEXT:    wait
422; AVX-32-NEXT:    vextractps $3, %xmm0, %eax
423; AVX-32-NEXT:    shrl $31, %eax
424; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
425; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
426; AVX-32-NEXT:    fstps (%esp)
427; AVX-32-NEXT:    wait
428; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
429; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
430; AVX-32-NEXT:    movl %ebp, %esp
431; AVX-32-NEXT:    popl %ebp
432; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
433; AVX-32-NEXT:    retl
434;
435; AVX1-64-LABEL: uitofp_v2i64_v2f32:
436; AVX1-64:       # %bb.0:
437; AVX1-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
438; AVX1-64-NEXT:    vpsrlq $1, %xmm0, %xmm2
439; AVX1-64-NEXT:    vpor %xmm1, %xmm2, %xmm1
440; AVX1-64-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
441; AVX1-64-NEXT:    vpextrq $1, %xmm1, %rax
442; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
443; AVX1-64-NEXT:    vmovq %xmm1, %rax
444; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
445; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
446; AVX1-64-NEXT:    vaddps %xmm1, %xmm1, %xmm2
447; AVX1-64-NEXT:    vpxor %xmm3, %xmm3, %xmm3
448; AVX1-64-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
449; AVX1-64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
450; AVX1-64-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
451; AVX1-64-NEXT:    retq
452;
453; AVX512F-64-LABEL: uitofp_v2i64_v2f32:
454; AVX512F-64:       # %bb.0:
455; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
456; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
457; AVX512F-64-NEXT:    vmovq %xmm0, %rax
458; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
459; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
460; AVX512F-64-NEXT:    retq
461;
462; AVX512VL-64-LABEL: uitofp_v2i64_v2f32:
463; AVX512VL-64:       # %bb.0:
464; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
465; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
466; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
467; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
468; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
469; AVX512VL-64-NEXT:    retq
470;
471; AVX512DQ-32-LABEL: uitofp_v2i64_v2f32:
472; AVX512DQ-32:       # %bb.0:
473; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
474; AVX512DQ-32-NEXT:    vcvtuqq2ps %zmm0, %ymm1
475; AVX512DQ-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
476; AVX512DQ-32-NEXT:    vcvtuqq2ps %zmm0, %ymm0
477; AVX512DQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
478; AVX512DQ-32-NEXT:    vzeroupper
479; AVX512DQ-32-NEXT:    retl
480;
481; AVX512DQ-64-LABEL: uitofp_v2i64_v2f32:
482; AVX512DQ-64:       # %bb.0:
483; AVX512DQ-64-NEXT:    vpextrq $1, %xmm0, %rax
484; AVX512DQ-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
485; AVX512DQ-64-NEXT:    vmovq %xmm0, %rax
486; AVX512DQ-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
487; AVX512DQ-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
488; AVX512DQ-64-NEXT:    retq
489;
490; AVX512DQVL-LABEL: uitofp_v2i64_v2f32:
491; AVX512DQVL:       # %bb.0:
492; AVX512DQVL-NEXT:    vcvtuqq2ps %xmm0, %xmm0
493; AVX512DQVL-NEXT:    ret{{[l|q]}}
494 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
495                                                              metadata !"round.dynamic",
496                                                              metadata !"fpexcept.strict") #0
497  ret <2 x float> %result
498}
499
500define <4 x float> @sitofp_v4i1_v4f32(<4 x i1> %x) #0 {
501; SSE-LABEL: sitofp_v4i1_v4f32:
502; SSE:       # %bb.0:
503; SSE-NEXT:    pslld $31, %xmm0
504; SSE-NEXT:    psrad $31, %xmm0
505; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
506; SSE-NEXT:    ret{{[l|q]}}
507;
508; SSE41-LABEL: sitofp_v4i1_v4f32:
509; SSE41:       # %bb.0:
510; SSE41-NEXT:    pslld $31, %xmm0
511; SSE41-NEXT:    psrad $31, %xmm0
512; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
513; SSE41-NEXT:    ret{{[l|q]}}
514;
515; AVX-LABEL: sitofp_v4i1_v4f32:
516; AVX:       # %bb.0:
517; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
518; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
519; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
520; AVX-NEXT:    ret{{[l|q]}}
521 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1> %x,
522                                                              metadata !"round.dynamic",
523                                                              metadata !"fpexcept.strict") #0
524  ret <4 x float> %result
525}
526
527define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 {
528; SSE-32-LABEL: uitofp_v4i1_v4f32:
529; SSE-32:       # %bb.0:
530; SSE-32-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
531; SSE-32-NEXT:    cvtdq2ps %xmm0, %xmm0
532; SSE-32-NEXT:    retl
533;
534; SSE-64-LABEL: uitofp_v4i1_v4f32:
535; SSE-64:       # %bb.0:
536; SSE-64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
537; SSE-64-NEXT:    cvtdq2ps %xmm0, %xmm0
538; SSE-64-NEXT:    retq
539;
540; SSE41-32-LABEL: uitofp_v4i1_v4f32:
541; SSE41-32:       # %bb.0:
542; SSE41-32-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
543; SSE41-32-NEXT:    cvtdq2ps %xmm0, %xmm0
544; SSE41-32-NEXT:    retl
545;
546; SSE41-64-LABEL: uitofp_v4i1_v4f32:
547; SSE41-64:       # %bb.0:
548; SSE41-64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
549; SSE41-64-NEXT:    cvtdq2ps %xmm0, %xmm0
550; SSE41-64-NEXT:    retq
551;
552; AVX1-32-LABEL: uitofp_v4i1_v4f32:
553; AVX1-32:       # %bb.0:
554; AVX1-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
555; AVX1-32-NEXT:    vcvtdq2ps %xmm0, %xmm0
556; AVX1-32-NEXT:    retl
557;
558; AVX1-64-LABEL: uitofp_v4i1_v4f32:
559; AVX1-64:       # %bb.0:
560; AVX1-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
561; AVX1-64-NEXT:    vcvtdq2ps %xmm0, %xmm0
562; AVX1-64-NEXT:    retq
563;
564; AVX512F-LABEL: uitofp_v4i1_v4f32:
565; AVX512F:       # %bb.0:
566; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
567; AVX512F-NEXT:    vandps %xmm1, %xmm0, %xmm0
568; AVX512F-NEXT:    vcvtdq2ps %xmm0, %xmm0
569; AVX512F-NEXT:    ret{{[l|q]}}
570;
571; AVX512VL-32-LABEL: uitofp_v4i1_v4f32:
572; AVX512VL-32:       # %bb.0:
573; AVX512VL-32-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
574; AVX512VL-32-NEXT:    vcvtdq2ps %xmm0, %xmm0
575; AVX512VL-32-NEXT:    retl
576;
577; AVX512VL-64-LABEL: uitofp_v4i1_v4f32:
578; AVX512VL-64:       # %bb.0:
579; AVX512VL-64-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
580; AVX512VL-64-NEXT:    vcvtdq2ps %xmm0, %xmm0
581; AVX512VL-64-NEXT:    retq
582;
583; AVX512DQ-LABEL: uitofp_v4i1_v4f32:
584; AVX512DQ:       # %bb.0:
585; AVX512DQ-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
586; AVX512DQ-NEXT:    vandps %xmm1, %xmm0, %xmm0
587; AVX512DQ-NEXT:    vcvtdq2ps %xmm0, %xmm0
588; AVX512DQ-NEXT:    ret{{[l|q]}}
589;
590; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f32:
591; AVX512DQVL-32:       # %bb.0:
592; AVX512DQVL-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
593; AVX512DQVL-32-NEXT:    vcvtdq2ps %xmm0, %xmm0
594; AVX512DQVL-32-NEXT:    retl
595;
596; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f32:
597; AVX512DQVL-64:       # %bb.0:
598; AVX512DQVL-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
599; AVX512DQVL-64-NEXT:    vcvtdq2ps %xmm0, %xmm0
600; AVX512DQVL-64-NEXT:    retq
601 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x,
602                                                              metadata !"round.dynamic",
603                                                              metadata !"fpexcept.strict") #0
604  ret <4 x float> %result
605}
606
607define <4 x float> @sitofp_v4i8_v4f32(<4 x i8> %x) #0 {
608; SSE-LABEL: sitofp_v4i8_v4f32:
609; SSE:       # %bb.0:
610; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
611; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
612; SSE-NEXT:    psrad $24, %xmm0
613; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
614; SSE-NEXT:    ret{{[l|q]}}
615;
616; SSE41-LABEL: sitofp_v4i8_v4f32:
617; SSE41:       # %bb.0:
618; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
619; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
620; SSE41-NEXT:    psrad $24, %xmm0
621; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
622; SSE41-NEXT:    ret{{[l|q]}}
623;
624; AVX-LABEL: sitofp_v4i8_v4f32:
625; AVX:       # %bb.0:
626; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
627; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
628; AVX-NEXT:    ret{{[l|q]}}
629 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8> %x,
630                                                              metadata !"round.dynamic",
631                                                              metadata !"fpexcept.strict") #0
632  ret <4 x float> %result
633}
634
635define <4 x float> @uitofp_v4i8_v4f32(<4 x i8> %x) #0 {
636; SSE-LABEL: uitofp_v4i8_v4f32:
637; SSE:       # %bb.0:
638; SSE-NEXT:    pxor %xmm1, %xmm1
639; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
640; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
641; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
642; SSE-NEXT:    ret{{[l|q]}}
643;
644; SSE41-LABEL: uitofp_v4i8_v4f32:
645; SSE41:       # %bb.0:
646; SSE41-NEXT:    pxor %xmm1, %xmm1
647; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
648; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
649; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
650; SSE41-NEXT:    ret{{[l|q]}}
651;
652; AVX-LABEL: uitofp_v4i8_v4f32:
653; AVX:       # %bb.0:
654; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
655; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
656; AVX-NEXT:    ret{{[l|q]}}
657 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8> %x,
658                                                              metadata !"round.dynamic",
659                                                              metadata !"fpexcept.strict") #0
660  ret <4 x float> %result
661}
662
663define <4 x float> @sitofp_v4i16_v4f32(<4 x i16> %x) #0 {
664; SSE-LABEL: sitofp_v4i16_v4f32:
665; SSE:       # %bb.0:
666; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
667; SSE-NEXT:    psrad $16, %xmm0
668; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
669; SSE-NEXT:    ret{{[l|q]}}
670;
671; SSE41-LABEL: sitofp_v4i16_v4f32:
672; SSE41:       # %bb.0:
673; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
674; SSE41-NEXT:    psrad $16, %xmm0
675; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
676; SSE41-NEXT:    ret{{[l|q]}}
677;
678; AVX-LABEL: sitofp_v4i16_v4f32:
679; AVX:       # %bb.0:
680; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
681; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
682; AVX-NEXT:    ret{{[l|q]}}
683 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16> %x,
684                                                              metadata !"round.dynamic",
685                                                              metadata !"fpexcept.strict") #0
686  ret <4 x float> %result
687}
688
689define <4 x float> @uitofp_v4i16_v4f32(<4 x i16> %x) #0 {
690; SSE-LABEL: uitofp_v4i16_v4f32:
691; SSE:       # %bb.0:
692; SSE-NEXT:    pxor %xmm1, %xmm1
693; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
694; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
695; SSE-NEXT:    ret{{[l|q]}}
696;
697; SSE41-LABEL: uitofp_v4i16_v4f32:
698; SSE41:       # %bb.0:
699; SSE41-NEXT:    pxor %xmm1, %xmm1
700; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
701; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
702; SSE41-NEXT:    ret{{[l|q]}}
703;
704; AVX-LABEL: uitofp_v4i16_v4f32:
705; AVX:       # %bb.0:
706; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
707; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
708; AVX-NEXT:    ret{{[l|q]}}
709 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16> %x,
710                                                              metadata !"round.dynamic",
711                                                              metadata !"fpexcept.strict") #0
712  ret <4 x float> %result
713}
714
715define <4 x float> @sitofp_v4i32_v4f32(<4 x i32> %x) #0 {
716; SSE-LABEL: sitofp_v4i32_v4f32:
717; SSE:       # %bb.0:
718; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
719; SSE-NEXT:    ret{{[l|q]}}
720;
721; SSE41-LABEL: sitofp_v4i32_v4f32:
722; SSE41:       # %bb.0:
723; SSE41-NEXT:    cvtdq2ps %xmm0, %xmm0
724; SSE41-NEXT:    ret{{[l|q]}}
725;
726; AVX-LABEL: sitofp_v4i32_v4f32:
727; AVX:       # %bb.0:
728; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
729; AVX-NEXT:    ret{{[l|q]}}
730 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
731                                                              metadata !"round.dynamic",
732                                                              metadata !"fpexcept.strict") #0
733  ret <4 x float> %result
734}
735
736define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 {
737; SSE-32-LABEL: uitofp_v4i32_v4f32:
738; SSE-32:       # %bb.0:
739; SSE-32-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
740; SSE-32-NEXT:    pand %xmm0, %xmm1
741; SSE-32-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
742; SSE-32-NEXT:    psrld $16, %xmm0
743; SSE-32-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
744; SSE-32-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
745; SSE-32-NEXT:    addps %xmm1, %xmm0
746; SSE-32-NEXT:    retl
747;
748; SSE-64-LABEL: uitofp_v4i32_v4f32:
749; SSE-64:       # %bb.0:
750; SSE-64-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
751; SSE-64-NEXT:    pand %xmm0, %xmm1
752; SSE-64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
753; SSE-64-NEXT:    psrld $16, %xmm0
754; SSE-64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
755; SSE-64-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
756; SSE-64-NEXT:    addps %xmm1, %xmm0
757; SSE-64-NEXT:    retq
758;
759; SSE41-32-LABEL: uitofp_v4i32_v4f32:
760; SSE41-32:       # %bb.0:
761; SSE41-32-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
762; SSE41-32-NEXT:    pand %xmm0, %xmm1
763; SSE41-32-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
764; SSE41-32-NEXT:    psrld $16, %xmm0
765; SSE41-32-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
766; SSE41-32-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
767; SSE41-32-NEXT:    addps %xmm1, %xmm0
768; SSE41-32-NEXT:    retl
769;
770; SSE41-64-LABEL: uitofp_v4i32_v4f32:
771; SSE41-64:       # %bb.0:
772; SSE41-64-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
773; SSE41-64-NEXT:    pand %xmm0, %xmm1
774; SSE41-64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
775; SSE41-64-NEXT:    psrld $16, %xmm0
776; SSE41-64-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
777; SSE41-64-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
778; SSE41-64-NEXT:    addps %xmm1, %xmm0
779; SSE41-64-NEXT:    retq
780;
781; AVX1-32-LABEL: uitofp_v4i32_v4f32:
782; AVX1-32:       # %bb.0:
783; AVX1-32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
784; AVX1-32-NEXT:    vpsrld $16, %xmm0, %xmm0
785; AVX1-32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
786; AVX1-32-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
787; AVX1-32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
788; AVX1-32-NEXT:    retl
789;
790; AVX1-64-LABEL: uitofp_v4i32_v4f32:
791; AVX1-64:       # %bb.0:
792; AVX1-64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
793; AVX1-64-NEXT:    vpsrld $16, %xmm0, %xmm0
794; AVX1-64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
795; AVX1-64-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
796; AVX1-64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
797; AVX1-64-NEXT:    retq
798;
799; AVX512F-LABEL: uitofp_v4i32_v4f32:
800; AVX512F:       # %bb.0:
801; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
802; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
803; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
804; AVX512F-NEXT:    vzeroupper
805; AVX512F-NEXT:    ret{{[l|q]}}
806;
807; AVX512VL-LABEL: uitofp_v4i32_v4f32:
808; AVX512VL:       # %bb.0:
809; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
810; AVX512VL-NEXT:    ret{{[l|q]}}
811;
812; AVX512DQ-LABEL: uitofp_v4i32_v4f32:
813; AVX512DQ:       # %bb.0:
814; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
815; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
816; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
817; AVX512DQ-NEXT:    vzeroupper
818; AVX512DQ-NEXT:    ret{{[l|q]}}
819;
820; AVX512DQVL-LABEL: uitofp_v4i32_v4f32:
821; AVX512DQVL:       # %bb.0:
822; AVX512DQVL-NEXT:    vcvtudq2ps %xmm0, %xmm0
823; AVX512DQVL-NEXT:    ret{{[l|q]}}
824 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
825                                                              metadata !"round.dynamic",
826                                                              metadata !"fpexcept.strict") #0
827  ret <4 x float> %result
828}
829
830define <2 x double> @sitofp_v2i1_v2f64(<2 x i1> %x) #0 {
831; SSE-LABEL: sitofp_v2i1_v2f64:
832; SSE:       # %bb.0:
833; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
834; SSE-NEXT:    pslld $31, %xmm0
835; SSE-NEXT:    psrad $31, %xmm0
836; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
837; SSE-NEXT:    ret{{[l|q]}}
838;
839; SSE41-LABEL: sitofp_v2i1_v2f64:
840; SSE41:       # %bb.0:
841; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
842; SSE41-NEXT:    pslld $31, %xmm0
843; SSE41-NEXT:    psrad $31, %xmm0
844; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
845; SSE41-NEXT:    ret{{[l|q]}}
846;
847; AVX-LABEL: sitofp_v2i1_v2f64:
848; AVX:       # %bb.0:
849; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
850; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
851; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
852; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
853; AVX-NEXT:    ret{{[l|q]}}
854 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1> %x,
855                                                              metadata !"round.dynamic",
856                                                              metadata !"fpexcept.strict") #0
857  ret <2 x double> %result
858}
859
860define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
861; SSE-32-LABEL: uitofp_v2i1_v2f64:
862; SSE-32:       # %bb.0:
863; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
864; SSE-32-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
865; SSE-32-NEXT:    cvtdq2pd %xmm0, %xmm0
866; SSE-32-NEXT:    retl
867;
868; SSE-64-LABEL: uitofp_v2i1_v2f64:
869; SSE-64:       # %bb.0:
870; SSE-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
871; SSE-64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
872; SSE-64-NEXT:    cvtdq2pd %xmm0, %xmm0
873; SSE-64-NEXT:    retq
874;
875; SSE41-32-LABEL: uitofp_v2i1_v2f64:
876; SSE41-32:       # %bb.0:
877; SSE41-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
878; SSE41-32-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
879; SSE41-32-NEXT:    cvtdq2pd %xmm0, %xmm0
880; SSE41-32-NEXT:    retl
881;
882; SSE41-64-LABEL: uitofp_v2i1_v2f64:
883; SSE41-64:       # %bb.0:
884; SSE41-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
885; SSE41-64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
886; SSE41-64-NEXT:    cvtdq2pd %xmm0, %xmm0
887; SSE41-64-NEXT:    retq
888;
889; AVX1-32-LABEL: uitofp_v2i1_v2f64:
890; AVX1-32:       # %bb.0:
891; AVX1-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
892; AVX1-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
893; AVX1-32-NEXT:    vcvtdq2pd %xmm0, %xmm0
894; AVX1-32-NEXT:    retl
895;
896; AVX1-64-LABEL: uitofp_v2i1_v2f64:
897; AVX1-64:       # %bb.0:
898; AVX1-64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
899; AVX1-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
900; AVX1-64-NEXT:    vcvtdq2pd %xmm0, %xmm0
901; AVX1-64-NEXT:    retq
902;
903; AVX512F-LABEL: uitofp_v2i1_v2f64:
904; AVX512F:       # %bb.0:
905; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
906; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
907; AVX512F-NEXT:    vandps %xmm1, %xmm0, %xmm0
908; AVX512F-NEXT:    vcvtdq2pd %xmm0, %xmm0
909; AVX512F-NEXT:    ret{{[l|q]}}
910;
911; AVX512VL-32-LABEL: uitofp_v2i1_v2f64:
912; AVX512VL-32:       # %bb.0:
913; AVX512VL-32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
914; AVX512VL-32-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
915; AVX512VL-32-NEXT:    vcvtdq2pd %xmm0, %xmm0
916; AVX512VL-32-NEXT:    retl
917;
918; AVX512VL-64-LABEL: uitofp_v2i1_v2f64:
919; AVX512VL-64:       # %bb.0:
920; AVX512VL-64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
921; AVX512VL-64-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
922; AVX512VL-64-NEXT:    vcvtdq2pd %xmm0, %xmm0
923; AVX512VL-64-NEXT:    retq
924;
925; AVX512DQ-LABEL: uitofp_v2i1_v2f64:
926; AVX512DQ:       # %bb.0:
927; AVX512DQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
928; AVX512DQ-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
929; AVX512DQ-NEXT:    vandps %xmm1, %xmm0, %xmm0
930; AVX512DQ-NEXT:    vcvtdq2pd %xmm0, %xmm0
931; AVX512DQ-NEXT:    ret{{[l|q]}}
932;
933; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64:
934; AVX512DQVL-32:       # %bb.0:
935; AVX512DQVL-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
936; AVX512DQVL-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
937; AVX512DQVL-32-NEXT:    vcvtdq2pd %xmm0, %xmm0
938; AVX512DQVL-32-NEXT:    retl
939;
940; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64:
941; AVX512DQVL-64:       # %bb.0:
942; AVX512DQVL-64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
943; AVX512DQVL-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
944; AVX512DQVL-64-NEXT:    vcvtdq2pd %xmm0, %xmm0
945; AVX512DQVL-64-NEXT:    retq
946 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x,
947                                                              metadata !"round.dynamic",
948                                                              metadata !"fpexcept.strict") #0
949  ret <2 x double> %result
950}
951
952define <2 x double> @sitofp_v2i8_v2f64(<2 x i8> %x) #0 {
953; SSE-LABEL: sitofp_v2i8_v2f64:
954; SSE:       # %bb.0:
955; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
956; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
957; SSE-NEXT:    psrad $24, %xmm0
958; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
959; SSE-NEXT:    ret{{[l|q]}}
960;
961; SSE41-LABEL: sitofp_v2i8_v2f64:
962; SSE41:       # %bb.0:
963; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
964; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
965; SSE41-NEXT:    psrad $24, %xmm0
966; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
967; SSE41-NEXT:    ret{{[l|q]}}
968;
969; AVX-LABEL: sitofp_v2i8_v2f64:
970; AVX:       # %bb.0:
971; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
972; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
973; AVX-NEXT:    ret{{[l|q]}}
974 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8> %x,
975                                                              metadata !"round.dynamic",
976                                                              metadata !"fpexcept.strict") #0
977  ret <2 x double> %result
978}
979
980define <2 x double> @uitofp_v2i8_v2f64(<2 x i8> %x) #0 {
981; SSE-LABEL: uitofp_v2i8_v2f64:
982; SSE:       # %bb.0:
983; SSE-NEXT:    pxor %xmm1, %xmm1
984; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
985; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
986; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
987; SSE-NEXT:    ret{{[l|q]}}
988;
989; SSE41-LABEL: uitofp_v2i8_v2f64:
990; SSE41:       # %bb.0:
991; SSE41-NEXT:    pxor %xmm1, %xmm1
992; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
993; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
994; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
995; SSE41-NEXT:    ret{{[l|q]}}
996;
997; AVX-LABEL: uitofp_v2i8_v2f64:
998; AVX:       # %bb.0:
999; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1000; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
1001; AVX-NEXT:    ret{{[l|q]}}
1002 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8> %x,
1003                                                              metadata !"round.dynamic",
1004                                                              metadata !"fpexcept.strict") #0
1005  ret <2 x double> %result
1006}
1007
1008define <2 x double> @sitofp_v2i16_v2f64(<2 x i16> %x) #0 {
1009; SSE-LABEL: sitofp_v2i16_v2f64:
1010; SSE:       # %bb.0:
1011; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1012; SSE-NEXT:    psrad $16, %xmm0
1013; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
1014; SSE-NEXT:    ret{{[l|q]}}
1015;
1016; SSE41-LABEL: sitofp_v2i16_v2f64:
1017; SSE41:       # %bb.0:
1018; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1019; SSE41-NEXT:    psrad $16, %xmm0
1020; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
1021; SSE41-NEXT:    ret{{[l|q]}}
1022;
1023; AVX-LABEL: sitofp_v2i16_v2f64:
1024; AVX:       # %bb.0:
1025; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
1026; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
1027; AVX-NEXT:    ret{{[l|q]}}
1028 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16> %x,
1029                                                              metadata !"round.dynamic",
1030                                                              metadata !"fpexcept.strict") #0
1031  ret <2 x double> %result
1032}
1033
1034define <2 x double> @uitofp_v2i16_v2f64(<2 x i16> %x) #0 {
1035; SSE-LABEL: uitofp_v2i16_v2f64:
1036; SSE:       # %bb.0:
1037; SSE-NEXT:    pxor %xmm1, %xmm1
1038; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1039; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
1040; SSE-NEXT:    ret{{[l|q]}}
1041;
1042; SSE41-LABEL: uitofp_v2i16_v2f64:
1043; SSE41:       # %bb.0:
1044; SSE41-NEXT:    pxor %xmm1, %xmm1
1045; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1046; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
1047; SSE41-NEXT:    ret{{[l|q]}}
1048;
1049; AVX-LABEL: uitofp_v2i16_v2f64:
1050; AVX:       # %bb.0:
1051; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1052; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
1053; AVX-NEXT:    ret{{[l|q]}}
1054 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16> %x,
1055                                                              metadata !"round.dynamic",
1056                                                              metadata !"fpexcept.strict") #0
1057  ret <2 x double> %result
1058}
1059
1060define <2 x double> @sitofp_v2i32_v2f64(<2 x i32> %x) #0 {
1061; SSE-LABEL: sitofp_v2i32_v2f64:
1062; SSE:       # %bb.0:
1063; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
1064; SSE-NEXT:    ret{{[l|q]}}
1065;
1066; SSE41-LABEL: sitofp_v2i32_v2f64:
1067; SSE41:       # %bb.0:
1068; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
1069; SSE41-NEXT:    ret{{[l|q]}}
1070;
1071; AVX-LABEL: sitofp_v2i32_v2f64:
1072; AVX:       # %bb.0:
1073; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
1074; AVX-NEXT:    ret{{[l|q]}}
1075 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
1076                                                              metadata !"round.dynamic",
1077                                                              metadata !"fpexcept.strict") #0
1078  ret <2 x double> %result
1079}
1080
1081define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
1082; SSE-LABEL: uitofp_v2i32_v2f64:
1083; SSE:       # %bb.0:
1084; SSE-NEXT:    xorpd %xmm1, %xmm1
1085; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1086; SSE-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1087; SSE-NEXT:    orpd %xmm1, %xmm0
1088; SSE-NEXT:    subpd %xmm1, %xmm0
1089; SSE-NEXT:    ret{{[l|q]}}
1090;
1091; SSE41-LABEL: uitofp_v2i32_v2f64:
1092; SSE41:       # %bb.0:
1093; SSE41-NEXT:    xorpd %xmm1, %xmm1
1094; SSE41-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1095; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1096; SSE41-NEXT:    orpd %xmm1, %xmm0
1097; SSE41-NEXT:    subpd %xmm1, %xmm0
1098; SSE41-NEXT:    ret{{[l|q]}}
1099;
1100; AVX1-LABEL: uitofp_v2i32_v2f64:
1101; AVX1:       # %bb.0:
1102; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1103; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
1104; AVX1-NEXT:    # xmm1 = mem[0,0]
1105; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1106; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
1107; AVX1-NEXT:    ret{{[l|q]}}
1108;
1109; AVX512F-LABEL: uitofp_v2i32_v2f64:
1110; AVX512F:       # %bb.0:
1111; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1112; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
1113; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1114; AVX512F-NEXT:    vzeroupper
1115; AVX512F-NEXT:    ret{{[l|q]}}
1116;
1117; AVX512VL-LABEL: uitofp_v2i32_v2f64:
1118; AVX512VL:       # %bb.0:
1119; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %xmm0
1120; AVX512VL-NEXT:    ret{{[l|q]}}
1121;
1122; AVX512DQ-LABEL: uitofp_v2i32_v2f64:
1123; AVX512DQ:       # %bb.0:
1124; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1125; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
1126; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1127; AVX512DQ-NEXT:    vzeroupper
1128; AVX512DQ-NEXT:    ret{{[l|q]}}
1129;
1130; AVX512DQVL-LABEL: uitofp_v2i32_v2f64:
1131; AVX512DQVL:       # %bb.0:
1132; AVX512DQVL-NEXT:    vcvtudq2pd %xmm0, %xmm0
1133; AVX512DQVL-NEXT:    ret{{[l|q]}}
1134 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
1135                                                              metadata !"round.dynamic",
1136                                                              metadata !"fpexcept.strict") #0
1137  ret <2 x double> %result
1138}
1139
1140define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
1141; SSE-32-LABEL: sitofp_v2i64_v2f64:
1142; SSE-32:       # %bb.0:
1143; SSE-32-NEXT:    pushl %ebp
1144; SSE-32-NEXT:    .cfi_def_cfa_offset 8
1145; SSE-32-NEXT:    .cfi_offset %ebp, -8
1146; SSE-32-NEXT:    movl %esp, %ebp
1147; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
1148; SSE-32-NEXT:    andl $-8, %esp
1149; SSE-32-NEXT:    subl $32, %esp
1150; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1151; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1152; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1153; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
1154; SSE-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1155; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
1156; SSE-32-NEXT:    fstpl (%esp)
1157; SSE-32-NEXT:    wait
1158; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1159; SSE-32-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1160; SSE-32-NEXT:    movl %ebp, %esp
1161; SSE-32-NEXT:    popl %ebp
1162; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
1163; SSE-32-NEXT:    retl
1164;
1165; SSE-64-LABEL: sitofp_v2i64_v2f64:
1166; SSE-64:       # %bb.0:
1167; SSE-64-NEXT:    movq %xmm0, %rax
1168; SSE-64-NEXT:    cvtsi2sd %rax, %xmm1
1169; SSE-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1170; SSE-64-NEXT:    movq %xmm0, %rax
1171; SSE-64-NEXT:    xorps %xmm0, %xmm0
1172; SSE-64-NEXT:    cvtsi2sd %rax, %xmm0
1173; SSE-64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1174; SSE-64-NEXT:    movapd %xmm1, %xmm0
1175; SSE-64-NEXT:    retq
1176;
1177; SSE41-32-LABEL: sitofp_v2i64_v2f64:
1178; SSE41-32:       # %bb.0:
1179; SSE41-32-NEXT:    pushl %ebp
1180; SSE41-32-NEXT:    .cfi_def_cfa_offset 8
1181; SSE41-32-NEXT:    .cfi_offset %ebp, -8
1182; SSE41-32-NEXT:    movl %esp, %ebp
1183; SSE41-32-NEXT:    .cfi_def_cfa_register %ebp
1184; SSE41-32-NEXT:    andl $-8, %esp
1185; SSE41-32-NEXT:    subl $32, %esp
1186; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1187; SSE41-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1188; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1189; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
1190; SSE41-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1191; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
1192; SSE41-32-NEXT:    fstpl (%esp)
1193; SSE41-32-NEXT:    wait
1194; SSE41-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1195; SSE41-32-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1196; SSE41-32-NEXT:    movl %ebp, %esp
1197; SSE41-32-NEXT:    popl %ebp
1198; SSE41-32-NEXT:    .cfi_def_cfa %esp, 4
1199; SSE41-32-NEXT:    retl
1200;
1201; SSE41-64-LABEL: sitofp_v2i64_v2f64:
1202; SSE41-64:       # %bb.0:
1203; SSE41-64-NEXT:    movq %xmm0, %rax
1204; SSE41-64-NEXT:    cvtsi2sd %rax, %xmm1
1205; SSE41-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1206; SSE41-64-NEXT:    movq %xmm0, %rax
1207; SSE41-64-NEXT:    xorps %xmm0, %xmm0
1208; SSE41-64-NEXT:    cvtsi2sd %rax, %xmm0
1209; SSE41-64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1210; SSE41-64-NEXT:    movapd %xmm1, %xmm0
1211; SSE41-64-NEXT:    retq
1212;
1213; AVX-32-LABEL: sitofp_v2i64_v2f64:
1214; AVX-32:       # %bb.0:
1215; AVX-32-NEXT:    pushl %ebp
1216; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1217; AVX-32-NEXT:    .cfi_offset %ebp, -8
1218; AVX-32-NEXT:    movl %esp, %ebp
1219; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1220; AVX-32-NEXT:    andl $-8, %esp
1221; AVX-32-NEXT:    subl $32, %esp
1222; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1223; AVX-32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
1224; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1225; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1226; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1227; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1228; AVX-32-NEXT:    fstpl (%esp)
1229; AVX-32-NEXT:    wait
1230; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1231; AVX-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1232; AVX-32-NEXT:    movl %ebp, %esp
1233; AVX-32-NEXT:    popl %ebp
1234; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1235; AVX-32-NEXT:    retl
1236;
1237; AVX-64-LABEL: sitofp_v2i64_v2f64:
1238; AVX-64:       # %bb.0:
1239; AVX-64-NEXT:    vpextrq $1, %xmm0, %rax
1240; AVX-64-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
1241; AVX-64-NEXT:    vmovq %xmm0, %rax
1242; AVX-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
1243; AVX-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1244; AVX-64-NEXT:    retq
1245;
1246; AVX512DQ-LABEL: sitofp_v2i64_v2f64:
1247; AVX512DQ:       # %bb.0:
1248; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
1249; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
1250; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1251; AVX512DQ-NEXT:    vzeroupper
1252; AVX512DQ-NEXT:    ret{{[l|q]}}
1253;
1254; AVX512DQVL-LABEL: sitofp_v2i64_v2f64:
1255; AVX512DQVL:       # %bb.0:
1256; AVX512DQVL-NEXT:    vcvtqq2pd %xmm0, %xmm0
1257; AVX512DQVL-NEXT:    ret{{[l|q]}}
1258 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
1259                                                              metadata !"round.dynamic",
1260                                                              metadata !"fpexcept.strict") #0
1261  ret <2 x double> %result
1262}
1263
1264define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
1265; SSE-32-LABEL: uitofp_v2i64_v2f64:
1266; SSE-32:       # %bb.0:
1267; SSE-32-NEXT:    pushl %ebp
1268; SSE-32-NEXT:    .cfi_def_cfa_offset 8
1269; SSE-32-NEXT:    .cfi_offset %ebp, -8
1270; SSE-32-NEXT:    movl %esp, %ebp
1271; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
1272; SSE-32-NEXT:    andl $-8, %esp
1273; SSE-32-NEXT:    subl $32, %esp
1274; SSE-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1275; SSE-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1276; SSE-32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
1277; SSE-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1278; SSE-32-NEXT:    movd %xmm1, %eax
1279; SSE-32-NEXT:    shrl $31, %eax
1280; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
1281; SSE-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1282; SSE-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1283; SSE-32-NEXT:    wait
1284; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1285; SSE-32-NEXT:    movd %xmm0, %eax
1286; SSE-32-NEXT:    shrl $31, %eax
1287; SSE-32-NEXT:    fildll {{[0-9]+}}(%esp)
1288; SSE-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1289; SSE-32-NEXT:    fstpl (%esp)
1290; SSE-32-NEXT:    wait
1291; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1292; SSE-32-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1293; SSE-32-NEXT:    movl %ebp, %esp
1294; SSE-32-NEXT:    popl %ebp
1295; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
1296; SSE-32-NEXT:    retl
1297;
1298; SSE-64-LABEL: uitofp_v2i64_v2f64:
1299; SSE-64:       # %bb.0:
1300; SSE-64-NEXT:    movdqa %xmm0, %xmm1
1301; SSE-64-NEXT:    movq %xmm0, %rax
1302; SSE-64-NEXT:    movq %rax, %rcx
1303; SSE-64-NEXT:    shrq %rcx
1304; SSE-64-NEXT:    movl %eax, %edx
1305; SSE-64-NEXT:    andl $1, %edx
1306; SSE-64-NEXT:    orq %rcx, %rdx
1307; SSE-64-NEXT:    testq %rax, %rax
1308; SSE-64-NEXT:    cmovnsq %rax, %rdx
1309; SSE-64-NEXT:    xorps %xmm0, %xmm0
1310; SSE-64-NEXT:    cvtsi2sd %rdx, %xmm0
1311; SSE-64-NEXT:    jns .LBB21_2
1312; SSE-64-NEXT:  # %bb.1:
1313; SSE-64-NEXT:    addsd %xmm0, %xmm0
1314; SSE-64-NEXT:  .LBB21_2:
1315; SSE-64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1316; SSE-64-NEXT:    movq %xmm1, %rax
1317; SSE-64-NEXT:    movq %rax, %rcx
1318; SSE-64-NEXT:    shrq %rcx
1319; SSE-64-NEXT:    movl %eax, %edx
1320; SSE-64-NEXT:    andl $1, %edx
1321; SSE-64-NEXT:    orq %rcx, %rdx
1322; SSE-64-NEXT:    testq %rax, %rax
1323; SSE-64-NEXT:    cmovnsq %rax, %rdx
1324; SSE-64-NEXT:    xorps %xmm1, %xmm1
1325; SSE-64-NEXT:    cvtsi2sd %rdx, %xmm1
1326; SSE-64-NEXT:    jns .LBB21_4
1327; SSE-64-NEXT:  # %bb.3:
1328; SSE-64-NEXT:    addsd %xmm1, %xmm1
1329; SSE-64-NEXT:  .LBB21_4:
1330; SSE-64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1331; SSE-64-NEXT:    retq
1332;
1333; SSE41-32-LABEL: uitofp_v2i64_v2f64:
1334; SSE41-32:       # %bb.0:
1335; SSE41-32-NEXT:    pushl %ebp
1336; SSE41-32-NEXT:    .cfi_def_cfa_offset 8
1337; SSE41-32-NEXT:    .cfi_offset %ebp, -8
1338; SSE41-32-NEXT:    movl %esp, %ebp
1339; SSE41-32-NEXT:    .cfi_def_cfa_register %ebp
1340; SSE41-32-NEXT:    andl $-8, %esp
1341; SSE41-32-NEXT:    subl $32, %esp
1342; SSE41-32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
1343; SSE41-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1344; SSE41-32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
1345; SSE41-32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1346; SSE41-32-NEXT:    movd %xmm1, %eax
1347; SSE41-32-NEXT:    shrl $31, %eax
1348; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
1349; SSE41-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1350; SSE41-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1351; SSE41-32-NEXT:    wait
1352; SSE41-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1353; SSE41-32-NEXT:    movd %xmm0, %eax
1354; SSE41-32-NEXT:    shrl $31, %eax
1355; SSE41-32-NEXT:    fildll {{[0-9]+}}(%esp)
1356; SSE41-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1357; SSE41-32-NEXT:    fstpl (%esp)
1358; SSE41-32-NEXT:    wait
1359; SSE41-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1360; SSE41-32-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1361; SSE41-32-NEXT:    movl %ebp, %esp
1362; SSE41-32-NEXT:    popl %ebp
1363; SSE41-32-NEXT:    .cfi_def_cfa %esp, 4
1364; SSE41-32-NEXT:    retl
1365;
1366; SSE41-64-LABEL: uitofp_v2i64_v2f64:
1367; SSE41-64:       # %bb.0:
1368; SSE41-64-NEXT:    movdqa %xmm0, %xmm1
1369; SSE41-64-NEXT:    movq %xmm0, %rax
1370; SSE41-64-NEXT:    movq %rax, %rcx
1371; SSE41-64-NEXT:    shrq %rcx
1372; SSE41-64-NEXT:    movl %eax, %edx
1373; SSE41-64-NEXT:    andl $1, %edx
1374; SSE41-64-NEXT:    orq %rcx, %rdx
1375; SSE41-64-NEXT:    testq %rax, %rax
1376; SSE41-64-NEXT:    cmovnsq %rax, %rdx
1377; SSE41-64-NEXT:    xorps %xmm0, %xmm0
1378; SSE41-64-NEXT:    cvtsi2sd %rdx, %xmm0
1379; SSE41-64-NEXT:    jns .LBB21_2
1380; SSE41-64-NEXT:  # %bb.1:
1381; SSE41-64-NEXT:    addsd %xmm0, %xmm0
1382; SSE41-64-NEXT:  .LBB21_2:
1383; SSE41-64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1384; SSE41-64-NEXT:    movq %xmm1, %rax
1385; SSE41-64-NEXT:    movq %rax, %rcx
1386; SSE41-64-NEXT:    shrq %rcx
1387; SSE41-64-NEXT:    movl %eax, %edx
1388; SSE41-64-NEXT:    andl $1, %edx
1389; SSE41-64-NEXT:    orq %rcx, %rdx
1390; SSE41-64-NEXT:    testq %rax, %rax
1391; SSE41-64-NEXT:    cmovnsq %rax, %rdx
1392; SSE41-64-NEXT:    xorps %xmm1, %xmm1
1393; SSE41-64-NEXT:    cvtsi2sd %rdx, %xmm1
1394; SSE41-64-NEXT:    jns .LBB21_4
1395; SSE41-64-NEXT:  # %bb.3:
1396; SSE41-64-NEXT:    addsd %xmm1, %xmm1
1397; SSE41-64-NEXT:  .LBB21_4:
1398; SSE41-64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1399; SSE41-64-NEXT:    retq
1400;
1401; AVX-32-LABEL: uitofp_v2i64_v2f64:
1402; AVX-32:       # %bb.0:
1403; AVX-32-NEXT:    pushl %ebp
1404; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1405; AVX-32-NEXT:    .cfi_offset %ebp, -8
1406; AVX-32-NEXT:    movl %esp, %ebp
1407; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1408; AVX-32-NEXT:    andl $-8, %esp
1409; AVX-32-NEXT:    subl $32, %esp
1410; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1411; AVX-32-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1412; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
1413; AVX-32-NEXT:    vextractps $1, %xmm0, %eax
1414; AVX-32-NEXT:    shrl $31, %eax
1415; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1416; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1417; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
1418; AVX-32-NEXT:    wait
1419; AVX-32-NEXT:    vextractps $3, %xmm0, %eax
1420; AVX-32-NEXT:    shrl $31, %eax
1421; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1422; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1423; AVX-32-NEXT:    fstpl (%esp)
1424; AVX-32-NEXT:    wait
1425; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1426; AVX-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1427; AVX-32-NEXT:    movl %ebp, %esp
1428; AVX-32-NEXT:    popl %ebp
1429; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1430; AVX-32-NEXT:    retl
1431;
1432; AVX1-64-LABEL: uitofp_v2i64_v2f64:
1433; AVX1-64:       # %bb.0:
1434; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
1435; AVX1-64-NEXT:    movq %rax, %rcx
1436; AVX1-64-NEXT:    shrq %rcx
1437; AVX1-64-NEXT:    movl %eax, %edx
1438; AVX1-64-NEXT:    andl $1, %edx
1439; AVX1-64-NEXT:    orq %rcx, %rdx
1440; AVX1-64-NEXT:    testq %rax, %rax
1441; AVX1-64-NEXT:    cmovnsq %rax, %rdx
1442; AVX1-64-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
1443; AVX1-64-NEXT:    jns .LBB21_2
1444; AVX1-64-NEXT:  # %bb.1:
1445; AVX1-64-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
1446; AVX1-64-NEXT:  .LBB21_2:
1447; AVX1-64-NEXT:    vmovq %xmm0, %rax
1448; AVX1-64-NEXT:    movq %rax, %rcx
1449; AVX1-64-NEXT:    shrq %rcx
1450; AVX1-64-NEXT:    movl %eax, %edx
1451; AVX1-64-NEXT:    andl $1, %edx
1452; AVX1-64-NEXT:    orq %rcx, %rdx
1453; AVX1-64-NEXT:    testq %rax, %rax
1454; AVX1-64-NEXT:    cmovnsq %rax, %rdx
1455; AVX1-64-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm0
1456; AVX1-64-NEXT:    jns .LBB21_4
1457; AVX1-64-NEXT:  # %bb.3:
1458; AVX1-64-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
1459; AVX1-64-NEXT:  .LBB21_4:
1460; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1461; AVX1-64-NEXT:    retq
1462;
1463; AVX512F-64-LABEL: uitofp_v2i64_v2f64:
1464; AVX512F-64:       # %bb.0:
1465; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
1466; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
1467; AVX512F-64-NEXT:    vmovq %xmm0, %rax
1468; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm0
1469; AVX512F-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1470; AVX512F-64-NEXT:    retq
1471;
1472; AVX512VL-64-LABEL: uitofp_v2i64_v2f64:
1473; AVX512VL-64:       # %bb.0:
1474; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
1475; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
1476; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
1477; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm0
1478; AVX512VL-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1479; AVX512VL-64-NEXT:    retq
1480;
1481; AVX512DQ-LABEL: uitofp_v2i64_v2f64:
1482; AVX512DQ:       # %bb.0:
1483; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
1484; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
1485; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1486; AVX512DQ-NEXT:    vzeroupper
1487; AVX512DQ-NEXT:    ret{{[l|q]}}
1488;
1489; AVX512DQVL-LABEL: uitofp_v2i64_v2f64:
1490; AVX512DQVL:       # %bb.0:
1491; AVX512DQVL-NEXT:    vcvtuqq2pd %xmm0, %xmm0
1492; AVX512DQVL-NEXT:    ret{{[l|q]}}
1493 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
1494                                                              metadata !"round.dynamic",
1495                                                              metadata !"fpexcept.strict") #0
1496  ret <2 x double> %result
1497}
1498
1499attributes #0 = { strictfp }
1500