xref: /llvm-project/llvm/test/CodeGen/X86/vec_fp_to_int.ll (revision be6c752e157638849f1f59f7e2b7ecbe11a022fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
9;
10; 32-bit tests to make sure we're not doing anything stupid.
11; RUN: llc < %s -mtriple=i686-unknown-unknown
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
14
15;
16; Double to Signed Integer
17;
18
19define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20; SSE-LABEL: fptosi_2f64_to_2i64:
21; SSE:       # %bb.0:
22; SSE-NEXT:    cvttsd2si %xmm0, %rax
23; SSE-NEXT:    movq %rax, %xmm1
24; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
25; SSE-NEXT:    cvttsd2si %xmm0, %rax
26; SSE-NEXT:    movq %rax, %xmm0
27; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28; SSE-NEXT:    movdqa %xmm1, %xmm0
29; SSE-NEXT:    retq
30;
31; VEX-LABEL: fptosi_2f64_to_2i64:
32; VEX:       # %bb.0:
33; VEX-NEXT:    vcvttsd2si %xmm0, %rax
34; VEX-NEXT:    vmovq %rax, %xmm1
35; VEX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
36; VEX-NEXT:    vcvttsd2si %xmm0, %rax
37; VEX-NEXT:    vmovq %rax, %xmm0
38; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
39; VEX-NEXT:    retq
40;
41; AVX512F-LABEL: fptosi_2f64_to_2i64:
42; AVX512F:       # %bb.0:
43; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
44; AVX512F-NEXT:    vmovq %rax, %xmm1
45; AVX512F-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
46; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
47; AVX512F-NEXT:    vmovq %rax, %xmm0
48; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
49; AVX512F-NEXT:    retq
50;
51; AVX512VL-LABEL: fptosi_2f64_to_2i64:
52; AVX512VL:       # %bb.0:
53; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
54; AVX512VL-NEXT:    vmovq %rax, %xmm1
55; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
56; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
57; AVX512VL-NEXT:    vmovq %rax, %xmm0
58; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
59; AVX512VL-NEXT:    retq
60;
61; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
62; AVX512DQ:       # %bb.0:
63; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
64; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
65; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
66; AVX512DQ-NEXT:    vzeroupper
67; AVX512DQ-NEXT:    retq
68;
69; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70; AVX512VLDQ:       # %bb.0:
71; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
72; AVX512VLDQ-NEXT:    retq
73  %cvt = fptosi <2 x double> %a to <2 x i64>
74  ret <2 x i64> %cvt
75}
76
77define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78; SSE-LABEL: fptosi_2f64_to_4i32:
79; SSE:       # %bb.0:
80; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: fptosi_2f64_to_4i32:
84; AVX:       # %bb.0:
85; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
86; AVX-NEXT:    retq
87  %cvt = fptosi <2 x double> %a to <2 x i32>
88  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
89  ret <4 x i32> %ext
90}
91
92define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93; SSE-LABEL: fptosi_2f64_to_2i32:
94; SSE:       # %bb.0:
95; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
96; SSE-NEXT:    retq
97;
98; AVX-LABEL: fptosi_2f64_to_2i32:
99; AVX:       # %bb.0:
100; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
101; AVX-NEXT:    retq
102  %cvt = fptosi <2 x double> %a to <2 x i32>
103  ret <2 x i32> %cvt
104}
105
106define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107; SSE-LABEL: fptosi_4f64_to_2i32:
108; SSE:       # %bb.0:
109; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: fptosi_4f64_to_2i32:
113; AVX:       # %bb.0:
114; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
115; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
116; AVX-NEXT:    vzeroupper
117; AVX-NEXT:    retq
118  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119  %cvt = fptosi <4 x double> %ext to <4 x i32>
120  ret <4 x i32> %cvt
121}
122
123define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124; SSE-LABEL: fptosi_4f64_to_4i64:
125; SSE:       # %bb.0:
126; SSE-NEXT:    cvttsd2si %xmm0, %rax
127; SSE-NEXT:    movq %rax, %xmm2
128; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
129; SSE-NEXT:    cvttsd2si %xmm0, %rax
130; SSE-NEXT:    movq %rax, %xmm0
131; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132; SSE-NEXT:    cvttsd2si %xmm1, %rax
133; SSE-NEXT:    movq %rax, %xmm3
134; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
135; SSE-NEXT:    cvttsd2si %xmm1, %rax
136; SSE-NEXT:    movq %rax, %xmm0
137; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138; SSE-NEXT:    movdqa %xmm2, %xmm0
139; SSE-NEXT:    movdqa %xmm3, %xmm1
140; SSE-NEXT:    retq
141;
142; AVX1-LABEL: fptosi_4f64_to_4i64:
143; AVX1:       # %bb.0:
144; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
145; AVX1-NEXT:    vcvttsd2si %xmm1, %rax
146; AVX1-NEXT:    vmovq %rax, %xmm2
147; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
148; AVX1-NEXT:    vcvttsd2si %xmm1, %rax
149; AVX1-NEXT:    vmovq %rax, %xmm1
150; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
152; AVX1-NEXT:    vmovq %rax, %xmm2
153; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
154; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
155; AVX1-NEXT:    vmovq %rax, %xmm0
156; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
158; AVX1-NEXT:    retq
159;
160; AVX2-LABEL: fptosi_4f64_to_4i64:
161; AVX2:       # %bb.0:
162; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
163; AVX2-NEXT:    vcvttsd2si %xmm1, %rax
164; AVX2-NEXT:    vmovq %rax, %xmm2
165; AVX2-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
166; AVX2-NEXT:    vcvttsd2si %xmm1, %rax
167; AVX2-NEXT:    vmovq %rax, %xmm1
168; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169; AVX2-NEXT:    vcvttsd2si %xmm0, %rax
170; AVX2-NEXT:    vmovq %rax, %xmm2
171; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
172; AVX2-NEXT:    vcvttsd2si %xmm0, %rax
173; AVX2-NEXT:    vmovq %rax, %xmm0
174; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
176; AVX2-NEXT:    retq
177;
178; AVX512F-LABEL: fptosi_4f64_to_4i64:
179; AVX512F:       # %bb.0:
180; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
181; AVX512F-NEXT:    vcvttsd2si %xmm1, %rax
182; AVX512F-NEXT:    vmovq %rax, %xmm2
183; AVX512F-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
184; AVX512F-NEXT:    vcvttsd2si %xmm1, %rax
185; AVX512F-NEXT:    vmovq %rax, %xmm1
186; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
188; AVX512F-NEXT:    vmovq %rax, %xmm2
189; AVX512F-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
190; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
191; AVX512F-NEXT:    vmovq %rax, %xmm0
192; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
194; AVX512F-NEXT:    retq
195;
196; AVX512VL-LABEL: fptosi_4f64_to_4i64:
197; AVX512VL:       # %bb.0:
198; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm1
199; AVX512VL-NEXT:    vcvttsd2si %xmm1, %rax
200; AVX512VL-NEXT:    vmovq %rax, %xmm2
201; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
202; AVX512VL-NEXT:    vcvttsd2si %xmm1, %rax
203; AVX512VL-NEXT:    vmovq %rax, %xmm1
204; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
206; AVX512VL-NEXT:    vmovq %rax, %xmm2
207; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
208; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
209; AVX512VL-NEXT:    vmovq %rax, %xmm0
210; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
212; AVX512VL-NEXT:    retq
213;
214; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
215; AVX512DQ:       # %bb.0:
216; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
217; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
218; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
219; AVX512DQ-NEXT:    retq
220;
221; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222; AVX512VLDQ:       # %bb.0:
223; AVX512VLDQ-NEXT:    vcvttpd2qq %ymm0, %ymm0
224; AVX512VLDQ-NEXT:    retq
225  %cvt = fptosi <4 x double> %a to <4 x i64>
226  ret <4 x i64> %cvt
227}
228
229define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230; SSE-LABEL: fptosi_4f64_to_4i32:
231; SSE:       # %bb.0:
232; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
233; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
234; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
235; SSE-NEXT:    retq
236;
237; AVX-LABEL: fptosi_4f64_to_4i32:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
240; AVX-NEXT:    vzeroupper
241; AVX-NEXT:    retq
242  %cvt = fptosi <4 x double> %a to <4 x i32>
243  ret <4 x i32> %cvt
244}
245
246;
247; Double to Unsigned Integer
248;
249
250define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251; SSE-LABEL: fptoui_2f64_to_2i64:
252; SSE:       # %bb.0:
253; SSE-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
254; SSE-NEXT:    movapd %xmm0, %xmm1
255; SSE-NEXT:    subsd %xmm2, %xmm1
256; SSE-NEXT:    cvttsd2si %xmm1, %rax
257; SSE-NEXT:    cvttsd2si %xmm0, %rcx
258; SSE-NEXT:    movq %rcx, %rdx
259; SSE-NEXT:    sarq $63, %rdx
260; SSE-NEXT:    andq %rax, %rdx
261; SSE-NEXT:    orq %rcx, %rdx
262; SSE-NEXT:    movq %rdx, %xmm1
263; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264; SSE-NEXT:    cvttsd2si %xmm0, %rax
265; SSE-NEXT:    subsd %xmm2, %xmm0
266; SSE-NEXT:    cvttsd2si %xmm0, %rcx
267; SSE-NEXT:    movq %rax, %rdx
268; SSE-NEXT:    sarq $63, %rdx
269; SSE-NEXT:    andq %rcx, %rdx
270; SSE-NEXT:    orq %rax, %rdx
271; SSE-NEXT:    movq %rdx, %xmm0
272; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273; SSE-NEXT:    movdqa %xmm1, %xmm0
274; SSE-NEXT:    retq
275;
276; VEX-LABEL: fptoui_2f64_to_2i64:
277; VEX:       # %bb.0:
278; VEX-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
279; VEX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
280; VEX-NEXT:    vcvttsd2si %xmm2, %rax
281; VEX-NEXT:    vcvttsd2si %xmm0, %rcx
282; VEX-NEXT:    movq %rcx, %rdx
283; VEX-NEXT:    sarq $63, %rdx
284; VEX-NEXT:    andq %rax, %rdx
285; VEX-NEXT:    orq %rcx, %rdx
286; VEX-NEXT:    vmovq %rdx, %xmm2
287; VEX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
288; VEX-NEXT:    vsubsd %xmm1, %xmm0, %xmm1
289; VEX-NEXT:    vcvttsd2si %xmm1, %rax
290; VEX-NEXT:    vcvttsd2si %xmm0, %rcx
291; VEX-NEXT:    movq %rcx, %rdx
292; VEX-NEXT:    sarq $63, %rdx
293; VEX-NEXT:    andq %rax, %rdx
294; VEX-NEXT:    orq %rcx, %rdx
295; VEX-NEXT:    vmovq %rdx, %xmm0
296; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
297; VEX-NEXT:    retq
298;
299; AVX512F-LABEL: fptoui_2f64_to_2i64:
300; AVX512F:       # %bb.0:
301; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
302; AVX512F-NEXT:    vmovq %rax, %xmm1
303; AVX512F-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
304; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
305; AVX512F-NEXT:    vmovq %rax, %xmm0
306; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
307; AVX512F-NEXT:    retq
308;
309; AVX512VL-LABEL: fptoui_2f64_to_2i64:
310; AVX512VL:       # %bb.0:
311; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
312; AVX512VL-NEXT:    vmovq %rax, %xmm1
313; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
314; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
315; AVX512VL-NEXT:    vmovq %rax, %xmm0
316; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317; AVX512VL-NEXT:    retq
318;
319; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
320; AVX512DQ:       # %bb.0:
321; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
322; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
323; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
324; AVX512DQ-NEXT:    vzeroupper
325; AVX512DQ-NEXT:    retq
326;
327; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
328; AVX512VLDQ:       # %bb.0:
329; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
330; AVX512VLDQ-NEXT:    retq
331  %cvt = fptoui <2 x double> %a to <2 x i64>
332  ret <2 x i64> %cvt
333}
334
335define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
336; SSE-LABEL: fptoui_2f64_to_4i32:
337; SSE:       # %bb.0:
338; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
339; SSE-NEXT:    movapd %xmm1, %xmm2
340; SSE-NEXT:    psrad $31, %xmm2
341; SSE-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
342; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
343; SSE-NEXT:    andpd %xmm2, %xmm0
344; SSE-NEXT:    orpd %xmm1, %xmm0
345; SSE-NEXT:    retq
346;
347; VEX-LABEL: fptoui_2f64_to_4i32:
348; VEX:       # %bb.0:
349; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm1
350; VEX-NEXT:    vpsrad $31, %xmm1, %xmm2
351; VEX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
352; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
353; VEX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
354; VEX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
355; VEX-NEXT:    retq
356;
357; AVX512F-LABEL: fptoui_2f64_to_4i32:
358; AVX512F:       # %bb.0:
359; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
360; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
361; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
362; AVX512F-NEXT:    vzeroupper
363; AVX512F-NEXT:    retq
364;
365; AVX512VL-LABEL: fptoui_2f64_to_4i32:
366; AVX512VL:       # %bb.0:
367; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
368; AVX512VL-NEXT:    retq
369;
370; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
371; AVX512DQ:       # %bb.0:
372; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
373; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
374; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
375; AVX512DQ-NEXT:    vzeroupper
376; AVX512DQ-NEXT:    retq
377;
378; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
379; AVX512VLDQ:       # %bb.0:
380; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
381; AVX512VLDQ-NEXT:    retq
382  %cvt = fptoui <2 x double> %a to <2 x i32>
383  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384  ret <4 x i32> %ext
385}
386
387define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
388; SSE-LABEL: fptoui_2f64_to_2i32:
389; SSE:       # %bb.0:
390; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
391; SSE-NEXT:    movapd %xmm1, %xmm2
392; SSE-NEXT:    psrad $31, %xmm2
393; SSE-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
394; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
395; SSE-NEXT:    andpd %xmm2, %xmm0
396; SSE-NEXT:    orpd %xmm1, %xmm0
397; SSE-NEXT:    retq
398;
399; VEX-LABEL: fptoui_2f64_to_2i32:
400; VEX:       # %bb.0:
401; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm1
402; VEX-NEXT:    vpsrad $31, %xmm1, %xmm2
403; VEX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
404; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
405; VEX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
406; VEX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
407; VEX-NEXT:    retq
408;
409; AVX512F-LABEL: fptoui_2f64_to_2i32:
410; AVX512F:       # %bb.0:
411; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
412; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
413; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
414; AVX512F-NEXT:    vzeroupper
415; AVX512F-NEXT:    retq
416;
417; AVX512VL-LABEL: fptoui_2f64_to_2i32:
418; AVX512VL:       # %bb.0:
419; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
420; AVX512VL-NEXT:    retq
421;
422; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
423; AVX512DQ:       # %bb.0:
424; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
425; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
426; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
427; AVX512DQ-NEXT:    vzeroupper
428; AVX512DQ-NEXT:    retq
429;
430; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
431; AVX512VLDQ:       # %bb.0:
432; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
433; AVX512VLDQ-NEXT:    retq
434  %cvt = fptoui <2 x double> %a to <2 x i32>
435  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
436  ret <4 x i32> %ext
437}
438
439define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
440; SSE-LABEL: fptoui_4f64_to_2i32:
441; SSE:       # %bb.0:
442; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
443; SSE-NEXT:    movapd %xmm1, %xmm2
444; SSE-NEXT:    psrad $31, %xmm2
445; SSE-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
446; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
447; SSE-NEXT:    andpd %xmm2, %xmm0
448; SSE-NEXT:    orpd %xmm1, %xmm0
449; SSE-NEXT:    retq
450;
451; AVX1-LABEL: fptoui_4f64_to_2i32:
452; AVX1:       # %bb.0:
453; AVX1-NEXT:    vmovapd %xmm0, %xmm0
454; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm1
455; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
456; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
458; AVX1-NEXT:    vandpd %xmm2, %xmm0, %xmm0
459; AVX1-NEXT:    vorpd %xmm0, %xmm1, %xmm0
460; AVX1-NEXT:    vzeroupper
461; AVX1-NEXT:    retq
462;
463; AVX2-LABEL: fptoui_4f64_to_2i32:
464; AVX2:       # %bb.0:
465; AVX2-NEXT:    vmovapd %xmm0, %xmm0
466; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
467; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
468; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
469; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
470; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
471; AVX2-NEXT:    vandpd %xmm2, %xmm1, %xmm1
472; AVX2-NEXT:    vorpd %xmm1, %xmm0, %xmm0
473; AVX2-NEXT:    vzeroupper
474; AVX2-NEXT:    retq
475;
476; AVX512F-LABEL: fptoui_4f64_to_2i32:
477; AVX512F:       # %bb.0:
478; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
479; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
480; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
481; AVX512F-NEXT:    vzeroupper
482; AVX512F-NEXT:    retq
483;
484; AVX512VL-LABEL: fptoui_4f64_to_2i32:
485; AVX512VL:       # %bb.0:
486; AVX512VL-NEXT:    vmovaps %xmm0, %xmm0
487; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
488; AVX512VL-NEXT:    vzeroupper
489; AVX512VL-NEXT:    retq
490;
491; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
492; AVX512DQ:       # %bb.0:
493; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
494; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
495; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
496; AVX512DQ-NEXT:    vzeroupper
497; AVX512DQ-NEXT:    retq
498;
499; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
500; AVX512VLDQ:       # %bb.0:
501; AVX512VLDQ-NEXT:    vmovaps %xmm0, %xmm0
502; AVX512VLDQ-NEXT:    vcvttpd2udq %ymm0, %xmm0
503; AVX512VLDQ-NEXT:    vzeroupper
504; AVX512VLDQ-NEXT:    retq
505  %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
506  %cvt = fptoui <4 x double> %ext to <4 x i32>
507  ret <4 x i32> %cvt
508}
509
510define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
511; SSE-LABEL: fptoui_4f64_to_4i64:
512; SSE:       # %bb.0:
513; SSE-NEXT:    movapd %xmm0, %xmm2
514; SSE-NEXT:    movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0]
515; SSE-NEXT:    subsd %xmm3, %xmm0
516; SSE-NEXT:    cvttsd2si %xmm0, %rax
517; SSE-NEXT:    cvttsd2si %xmm2, %rcx
518; SSE-NEXT:    movq %rcx, %rdx
519; SSE-NEXT:    sarq $63, %rdx
520; SSE-NEXT:    andq %rax, %rdx
521; SSE-NEXT:    orq %rcx, %rdx
522; SSE-NEXT:    movq %rdx, %xmm0
523; SSE-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
524; SSE-NEXT:    cvttsd2si %xmm2, %rax
525; SSE-NEXT:    subsd %xmm3, %xmm2
526; SSE-NEXT:    cvttsd2si %xmm2, %rcx
527; SSE-NEXT:    movq %rax, %rdx
528; SSE-NEXT:    sarq $63, %rdx
529; SSE-NEXT:    andq %rcx, %rdx
530; SSE-NEXT:    orq %rax, %rdx
531; SSE-NEXT:    movq %rdx, %xmm2
532; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
533; SSE-NEXT:    movapd %xmm1, %xmm2
534; SSE-NEXT:    subsd %xmm3, %xmm2
535; SSE-NEXT:    cvttsd2si %xmm2, %rax
536; SSE-NEXT:    cvttsd2si %xmm1, %rcx
537; SSE-NEXT:    movq %rcx, %rdx
538; SSE-NEXT:    sarq $63, %rdx
539; SSE-NEXT:    andq %rax, %rdx
540; SSE-NEXT:    orq %rcx, %rdx
541; SSE-NEXT:    movq %rdx, %xmm2
542; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
543; SSE-NEXT:    cvttsd2si %xmm1, %rax
544; SSE-NEXT:    subsd %xmm3, %xmm1
545; SSE-NEXT:    cvttsd2si %xmm1, %rcx
546; SSE-NEXT:    movq %rax, %rdx
547; SSE-NEXT:    sarq $63, %rdx
548; SSE-NEXT:    andq %rcx, %rdx
549; SSE-NEXT:    orq %rax, %rdx
550; SSE-NEXT:    movq %rdx, %xmm1
551; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
552; SSE-NEXT:    movdqa %xmm2, %xmm1
553; SSE-NEXT:    retq
554;
555; AVX1-LABEL: fptoui_4f64_to_4i64:
556; AVX1:       # %bb.0:
557; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
558; AVX1-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
559; AVX1-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
560; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
561; AVX1-NEXT:    vcvttsd2si %xmm2, %rcx
562; AVX1-NEXT:    movq %rcx, %rdx
563; AVX1-NEXT:    sarq $63, %rdx
564; AVX1-NEXT:    andq %rax, %rdx
565; AVX1-NEXT:    orq %rcx, %rdx
566; AVX1-NEXT:    vmovq %rdx, %xmm3
567; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
568; AVX1-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
569; AVX1-NEXT:    vcvttsd2si %xmm4, %rax
570; AVX1-NEXT:    vcvttsd2si %xmm2, %rcx
571; AVX1-NEXT:    movq %rcx, %rdx
572; AVX1-NEXT:    sarq $63, %rdx
573; AVX1-NEXT:    andq %rax, %rdx
574; AVX1-NEXT:    orq %rcx, %rdx
575; AVX1-NEXT:    vmovq %rdx, %xmm2
576; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
577; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
578; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
579; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
580; AVX1-NEXT:    movq %rcx, %rdx
581; AVX1-NEXT:    sarq $63, %rdx
582; AVX1-NEXT:    andq %rax, %rdx
583; AVX1-NEXT:    orq %rcx, %rdx
584; AVX1-NEXT:    vmovq %rdx, %xmm3
585; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
586; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm1
587; AVX1-NEXT:    vcvttsd2si %xmm1, %rax
588; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
589; AVX1-NEXT:    movq %rcx, %rdx
590; AVX1-NEXT:    sarq $63, %rdx
591; AVX1-NEXT:    andq %rax, %rdx
592; AVX1-NEXT:    orq %rcx, %rdx
593; AVX1-NEXT:    vmovq %rdx, %xmm0
594; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
595; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
596; AVX1-NEXT:    retq
597;
598; AVX2-LABEL: fptoui_4f64_to_4i64:
599; AVX2:       # %bb.0:
600; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm2
601; AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
602; AVX2-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
603; AVX2-NEXT:    vcvttsd2si %xmm3, %rax
604; AVX2-NEXT:    vcvttsd2si %xmm2, %rcx
605; AVX2-NEXT:    movq %rcx, %rdx
606; AVX2-NEXT:    sarq $63, %rdx
607; AVX2-NEXT:    andq %rax, %rdx
608; AVX2-NEXT:    orq %rcx, %rdx
609; AVX2-NEXT:    vmovq %rdx, %xmm3
610; AVX2-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
611; AVX2-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
612; AVX2-NEXT:    vcvttsd2si %xmm4, %rax
613; AVX2-NEXT:    vcvttsd2si %xmm2, %rcx
614; AVX2-NEXT:    movq %rcx, %rdx
615; AVX2-NEXT:    sarq $63, %rdx
616; AVX2-NEXT:    andq %rax, %rdx
617; AVX2-NEXT:    orq %rcx, %rdx
618; AVX2-NEXT:    vmovq %rdx, %xmm2
619; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
620; AVX2-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
621; AVX2-NEXT:    vcvttsd2si %xmm3, %rax
622; AVX2-NEXT:    vcvttsd2si %xmm0, %rcx
623; AVX2-NEXT:    movq %rcx, %rdx
624; AVX2-NEXT:    sarq $63, %rdx
625; AVX2-NEXT:    andq %rax, %rdx
626; AVX2-NEXT:    orq %rcx, %rdx
627; AVX2-NEXT:    vmovq %rdx, %xmm3
628; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
629; AVX2-NEXT:    vsubsd %xmm1, %xmm0, %xmm1
630; AVX2-NEXT:    vcvttsd2si %xmm1, %rax
631; AVX2-NEXT:    vcvttsd2si %xmm0, %rcx
632; AVX2-NEXT:    movq %rcx, %rdx
633; AVX2-NEXT:    sarq $63, %rdx
634; AVX2-NEXT:    andq %rax, %rdx
635; AVX2-NEXT:    orq %rcx, %rdx
636; AVX2-NEXT:    vmovq %rdx, %xmm0
637; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
639; AVX2-NEXT:    retq
640;
641; AVX512F-LABEL: fptoui_4f64_to_4i64:
642; AVX512F:       # %bb.0:
643; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
644; AVX512F-NEXT:    vcvttsd2usi %xmm1, %rax
645; AVX512F-NEXT:    vmovq %rax, %xmm2
646; AVX512F-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
647; AVX512F-NEXT:    vcvttsd2usi %xmm1, %rax
648; AVX512F-NEXT:    vmovq %rax, %xmm1
649; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
650; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
651; AVX512F-NEXT:    vmovq %rax, %xmm2
652; AVX512F-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
653; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
654; AVX512F-NEXT:    vmovq %rax, %xmm0
655; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
656; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
657; AVX512F-NEXT:    retq
658;
659; AVX512VL-LABEL: fptoui_4f64_to_4i64:
660; AVX512VL:       # %bb.0:
661; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm1
662; AVX512VL-NEXT:    vcvttsd2usi %xmm1, %rax
663; AVX512VL-NEXT:    vmovq %rax, %xmm2
664; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
665; AVX512VL-NEXT:    vcvttsd2usi %xmm1, %rax
666; AVX512VL-NEXT:    vmovq %rax, %xmm1
667; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
668; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
669; AVX512VL-NEXT:    vmovq %rax, %xmm2
670; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
671; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
672; AVX512VL-NEXT:    vmovq %rax, %xmm0
673; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
674; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
675; AVX512VL-NEXT:    retq
676;
677; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
678; AVX512DQ:       # %bb.0:
679; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
680; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
681; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
682; AVX512DQ-NEXT:    retq
683;
684; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
685; AVX512VLDQ:       # %bb.0:
686; AVX512VLDQ-NEXT:    vcvttpd2uqq %ymm0, %ymm0
687; AVX512VLDQ-NEXT:    retq
688  %cvt = fptoui <4 x double> %a to <4 x i64>
689  ret <4 x i64> %cvt
690}
691
692define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
693; SSE-LABEL: fptoui_4f64_to_4i32:
694; SSE:       # %bb.0:
695; SSE-NEXT:    movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9]
696; SSE-NEXT:    cvttpd2dq %xmm1, %xmm3
697; SSE-NEXT:    subpd %xmm2, %xmm1
698; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
699; SSE-NEXT:    movapd %xmm3, %xmm4
700; SSE-NEXT:    psrad $31, %xmm4
701; SSE-NEXT:    pand %xmm1, %xmm4
702; SSE-NEXT:    por %xmm3, %xmm4
703; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
704; SSE-NEXT:    subpd %xmm2, %xmm0
705; SSE-NEXT:    cvttpd2dq %xmm0, %xmm2
706; SSE-NEXT:    movapd %xmm1, %xmm0
707; SSE-NEXT:    psrad $31, %xmm0
708; SSE-NEXT:    pand %xmm2, %xmm0
709; SSE-NEXT:    por %xmm1, %xmm0
710; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
711; SSE-NEXT:    retq
712;
713; AVX1-LABEL: fptoui_4f64_to_4i32:
714; AVX1:       # %bb.0:
715; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm1
716; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
717; AVX1-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
718; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
719; AVX1-NEXT:    vandpd %xmm2, %xmm0, %xmm0
720; AVX1-NEXT:    vorpd %xmm0, %xmm1, %xmm0
721; AVX1-NEXT:    vzeroupper
722; AVX1-NEXT:    retq
723;
724; AVX2-LABEL: fptoui_4f64_to_4i32:
725; AVX2:       # %bb.0:
726; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
727; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
728; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
729; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
730; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
731; AVX2-NEXT:    vandpd %xmm2, %xmm1, %xmm1
732; AVX2-NEXT:    vorpd %xmm1, %xmm0, %xmm0
733; AVX2-NEXT:    vzeroupper
734; AVX2-NEXT:    retq
735;
736; AVX512F-LABEL: fptoui_4f64_to_4i32:
737; AVX512F:       # %bb.0:
738; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
739; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
740; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
741; AVX512F-NEXT:    vzeroupper
742; AVX512F-NEXT:    retq
743;
744; AVX512VL-LABEL: fptoui_4f64_to_4i32:
745; AVX512VL:       # %bb.0:
746; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
747; AVX512VL-NEXT:    vzeroupper
748; AVX512VL-NEXT:    retq
749;
750; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
751; AVX512DQ:       # %bb.0:
752; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
753; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
754; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
755; AVX512DQ-NEXT:    vzeroupper
756; AVX512DQ-NEXT:    retq
757;
758; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
759; AVX512VLDQ:       # %bb.0:
760; AVX512VLDQ-NEXT:    vcvttpd2udq %ymm0, %xmm0
761; AVX512VLDQ-NEXT:    vzeroupper
762; AVX512VLDQ-NEXT:    retq
763  %cvt = fptoui <4 x double> %a to <4 x i32>
764  ret <4 x i32> %cvt
765}
766
767;
768; Float to Signed Integer
769;
770
771define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
772; SSE-LABEL: fptosi_2f32_to_2i32:
773; SSE:       # %bb.0:
774; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
775; SSE-NEXT:    retq
776;
777; AVX-LABEL: fptosi_2f32_to_2i32:
778; AVX:       # %bb.0:
779; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
780; AVX-NEXT:    retq
781  %cvt = fptosi <2 x float> %a to <2 x i32>
782  ret <2 x i32> %cvt
783}
784
785define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
786; SSE-LABEL: fptosi_4f32_to_4i32:
787; SSE:       # %bb.0:
788; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
789; SSE-NEXT:    retq
790;
791; AVX-LABEL: fptosi_4f32_to_4i32:
792; AVX:       # %bb.0:
793; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
794; AVX-NEXT:    retq
795  %cvt = fptosi <4 x float> %a to <4 x i32>
796  ret <4 x i32> %cvt
797}
798
799define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
800; SSE-LABEL: fptosi_2f32_to_2i64:
801; SSE:       # %bb.0:
802; SSE-NEXT:    cvttss2si %xmm0, %rax
803; SSE-NEXT:    movq %rax, %xmm1
804; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
805; SSE-NEXT:    cvttss2si %xmm0, %rax
806; SSE-NEXT:    movq %rax, %xmm0
807; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
808; SSE-NEXT:    movdqa %xmm1, %xmm0
809; SSE-NEXT:    retq
810;
811; VEX-LABEL: fptosi_2f32_to_2i64:
812; VEX:       # %bb.0:
813; VEX-NEXT:    vcvttss2si %xmm0, %rax
814; VEX-NEXT:    vmovq %rax, %xmm1
815; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
816; VEX-NEXT:    vcvttss2si %xmm0, %rax
817; VEX-NEXT:    vmovq %rax, %xmm0
818; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
819; VEX-NEXT:    retq
820;
821; AVX512F-LABEL: fptosi_2f32_to_2i64:
822; AVX512F:       # %bb.0:
823; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
824; AVX512F-NEXT:    vmovq %rax, %xmm1
825; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
826; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
827; AVX512F-NEXT:    vmovq %rax, %xmm0
828; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
829; AVX512F-NEXT:    retq
830;
831; AVX512VL-LABEL: fptosi_2f32_to_2i64:
832; AVX512VL:       # %bb.0:
833; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
834; AVX512VL-NEXT:    vmovq %rax, %xmm1
835; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
836; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
837; AVX512VL-NEXT:    vmovq %rax, %xmm0
838; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
839; AVX512VL-NEXT:    retq
840;
841; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
842; AVX512DQ:       # %bb.0:
843; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
844; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
845; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
846; AVX512DQ-NEXT:    vzeroupper
847; AVX512DQ-NEXT:    retq
848;
849; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
850; AVX512VLDQ:       # %bb.0:
851; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
852; AVX512VLDQ-NEXT:    retq
853  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
854  %cvt = fptosi <2 x float> %shuf to <2 x i64>
855  ret <2 x i64> %cvt
856}
857
858define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
859; SSE-LABEL: fptosi_4f32_to_2i64:
860; SSE:       # %bb.0:
861; SSE-NEXT:    cvttss2si %xmm0, %rax
862; SSE-NEXT:    movq %rax, %xmm1
863; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
864; SSE-NEXT:    cvttss2si %xmm0, %rax
865; SSE-NEXT:    movq %rax, %xmm0
866; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
867; SSE-NEXT:    movdqa %xmm1, %xmm0
868; SSE-NEXT:    retq
869;
870; VEX-LABEL: fptosi_4f32_to_2i64:
871; VEX:       # %bb.0:
872; VEX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
873; VEX-NEXT:    vcvttss2si %xmm1, %rax
874; VEX-NEXT:    vcvttss2si %xmm0, %rcx
875; VEX-NEXT:    vmovq %rcx, %xmm0
876; VEX-NEXT:    vmovq %rax, %xmm1
877; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
878; VEX-NEXT:    retq
879;
880; AVX512F-LABEL: fptosi_4f32_to_2i64:
881; AVX512F:       # %bb.0:
882; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
883; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
884; AVX512F-NEXT:    vcvttss2si %xmm0, %rcx
885; AVX512F-NEXT:    vmovq %rcx, %xmm0
886; AVX512F-NEXT:    vmovq %rax, %xmm1
887; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
888; AVX512F-NEXT:    retq
889;
890; AVX512VL-LABEL: fptosi_4f32_to_2i64:
891; AVX512VL:       # %bb.0:
892; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
893; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
894; AVX512VL-NEXT:    vcvttss2si %xmm0, %rcx
895; AVX512VL-NEXT:    vmovq %rcx, %xmm0
896; AVX512VL-NEXT:    vmovq %rax, %xmm1
897; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
898; AVX512VL-NEXT:    retq
899;
900; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
901; AVX512DQ:       # %bb.0:
902; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
903; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
904; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
905; AVX512DQ-NEXT:    vzeroupper
906; AVX512DQ-NEXT:    retq
907;
908; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
909; AVX512VLDQ:       # %bb.0:
910; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
911; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
912; AVX512VLDQ-NEXT:    vzeroupper
913; AVX512VLDQ-NEXT:    retq
914  %cvt = fptosi <4 x float> %a to <4 x i64>
915  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
916  ret <2 x i64> %shuf
917}
918
919define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
920; SSE-LABEL: fptosi_8f32_to_8i32:
921; SSE:       # %bb.0:
922; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
923; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
924; SSE-NEXT:    retq
925;
926; AVX-LABEL: fptosi_8f32_to_8i32:
927; AVX:       # %bb.0:
928; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
929; AVX-NEXT:    retq
930  %cvt = fptosi <8 x float> %a to <8 x i32>
931  ret <8 x i32> %cvt
932}
933
934define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
935; SSE-LABEL: fptosi_4f32_to_4i64:
936; SSE:       # %bb.0:
937; SSE-NEXT:    cvttss2si %xmm0, %rax
938; SSE-NEXT:    movq %rax, %xmm2
939; SSE-NEXT:    movaps %xmm0, %xmm1
940; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
941; SSE-NEXT:    cvttss2si %xmm1, %rax
942; SSE-NEXT:    movq %rax, %xmm1
943; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
944; SSE-NEXT:    movaps %xmm0, %xmm1
945; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
946; SSE-NEXT:    cvttss2si %xmm1, %rax
947; SSE-NEXT:    movq %rax, %xmm3
948; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
949; SSE-NEXT:    cvttss2si %xmm0, %rax
950; SSE-NEXT:    movq %rax, %xmm1
951; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
952; SSE-NEXT:    movdqa %xmm2, %xmm0
953; SSE-NEXT:    retq
954;
955; AVX1-LABEL: fptosi_4f32_to_4i64:
956; AVX1:       # %bb.0:
957; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
958; AVX1-NEXT:    vcvttss2si %xmm1, %rax
959; AVX1-NEXT:    vmovq %rax, %xmm1
960; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
961; AVX1-NEXT:    vcvttss2si %xmm2, %rax
962; AVX1-NEXT:    vmovq %rax, %xmm2
963; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
964; AVX1-NEXT:    vcvttss2si %xmm0, %rax
965; AVX1-NEXT:    vmovq %rax, %xmm2
966; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
967; AVX1-NEXT:    vcvttss2si %xmm0, %rax
968; AVX1-NEXT:    vmovq %rax, %xmm0
969; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
970; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
971; AVX1-NEXT:    retq
972;
973; AVX2-LABEL: fptosi_4f32_to_4i64:
974; AVX2:       # %bb.0:
975; AVX2-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
976; AVX2-NEXT:    vcvttss2si %xmm1, %rax
977; AVX2-NEXT:    vmovq %rax, %xmm1
978; AVX2-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
979; AVX2-NEXT:    vcvttss2si %xmm2, %rax
980; AVX2-NEXT:    vmovq %rax, %xmm2
981; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
982; AVX2-NEXT:    vcvttss2si %xmm0, %rax
983; AVX2-NEXT:    vmovq %rax, %xmm2
984; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
985; AVX2-NEXT:    vcvttss2si %xmm0, %rax
986; AVX2-NEXT:    vmovq %rax, %xmm0
987; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
988; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
989; AVX2-NEXT:    retq
990;
991; AVX512F-LABEL: fptosi_4f32_to_4i64:
992; AVX512F:       # %bb.0:
993; AVX512F-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
994; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
995; AVX512F-NEXT:    vmovq %rax, %xmm1
996; AVX512F-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
997; AVX512F-NEXT:    vcvttss2si %xmm2, %rax
998; AVX512F-NEXT:    vmovq %rax, %xmm2
999; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1000; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
1001; AVX512F-NEXT:    vmovq %rax, %xmm2
1002; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1003; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
1004; AVX512F-NEXT:    vmovq %rax, %xmm0
1005; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1006; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1007; AVX512F-NEXT:    retq
1008;
1009; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1010; AVX512VL:       # %bb.0:
1011; AVX512VL-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1012; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
1013; AVX512VL-NEXT:    vmovq %rax, %xmm1
1014; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1015; AVX512VL-NEXT:    vcvttss2si %xmm2, %rax
1016; AVX512VL-NEXT:    vmovq %rax, %xmm2
1017; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1018; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
1019; AVX512VL-NEXT:    vmovq %rax, %xmm2
1020; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1021; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
1022; AVX512VL-NEXT:    vmovq %rax, %xmm0
1023; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1024; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1025; AVX512VL-NEXT:    retq
1026;
1027; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1028; AVX512DQ:       # %bb.0:
1029; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1030; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1031; AVX512DQ-NEXT:    retq
1032;
1033; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1034; AVX512VLDQ:       # %bb.0:
1035; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
1036; AVX512VLDQ-NEXT:    retq
1037  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1038  %cvt = fptosi <4 x float> %shuf to <4 x i64>
1039  ret <4 x i64> %cvt
1040}
1041
1042define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1043; SSE-LABEL: fptosi_8f32_to_4i64:
1044; SSE:       # %bb.0:
1045; SSE-NEXT:    cvttss2si %xmm0, %rax
1046; SSE-NEXT:    movq %rax, %xmm2
1047; SSE-NEXT:    movaps %xmm0, %xmm1
1048; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
1049; SSE-NEXT:    cvttss2si %xmm1, %rax
1050; SSE-NEXT:    movq %rax, %xmm1
1051; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1052; SSE-NEXT:    movaps %xmm0, %xmm1
1053; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
1054; SSE-NEXT:    cvttss2si %xmm1, %rax
1055; SSE-NEXT:    movq %rax, %xmm3
1056; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1057; SSE-NEXT:    cvttss2si %xmm0, %rax
1058; SSE-NEXT:    movq %rax, %xmm1
1059; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1060; SSE-NEXT:    movdqa %xmm2, %xmm0
1061; SSE-NEXT:    retq
1062;
1063; AVX1-LABEL: fptosi_8f32_to_4i64:
1064; AVX1:       # %bb.0:
1065; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1066; AVX1-NEXT:    vcvttss2si %xmm1, %rax
1067; AVX1-NEXT:    vmovq %rax, %xmm1
1068; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1069; AVX1-NEXT:    vcvttss2si %xmm2, %rax
1070; AVX1-NEXT:    vmovq %rax, %xmm2
1071; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1072; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1073; AVX1-NEXT:    vmovq %rax, %xmm2
1074; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1075; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1076; AVX1-NEXT:    vmovq %rax, %xmm0
1077; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1078; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1079; AVX1-NEXT:    retq
1080;
1081; AVX2-LABEL: fptosi_8f32_to_4i64:
1082; AVX2:       # %bb.0:
1083; AVX2-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1084; AVX2-NEXT:    vcvttss2si %xmm1, %rax
1085; AVX2-NEXT:    vmovq %rax, %xmm1
1086; AVX2-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1087; AVX2-NEXT:    vcvttss2si %xmm2, %rax
1088; AVX2-NEXT:    vmovq %rax, %xmm2
1089; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1090; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1091; AVX2-NEXT:    vmovq %rax, %xmm2
1092; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1093; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1094; AVX2-NEXT:    vmovq %rax, %xmm0
1095; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1096; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1097; AVX2-NEXT:    retq
1098;
1099; AVX512F-LABEL: fptosi_8f32_to_4i64:
1100; AVX512F:       # %bb.0:
1101; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1102; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
1103; AVX512F-NEXT:    vcvttss2si %xmm0, %rcx
1104; AVX512F-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1105; AVX512F-NEXT:    vcvttss2si %xmm1, %rdx
1106; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1107; AVX512F-NEXT:    vcvttss2si %xmm0, %rsi
1108; AVX512F-NEXT:    vmovq %rsi, %xmm0
1109; AVX512F-NEXT:    vmovq %rdx, %xmm1
1110; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1111; AVX512F-NEXT:    vmovq %rcx, %xmm1
1112; AVX512F-NEXT:    vmovq %rax, %xmm2
1113; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1114; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1115; AVX512F-NEXT:    retq
1116;
1117; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1118; AVX512VL:       # %bb.0:
1119; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1120; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
1121; AVX512VL-NEXT:    vcvttss2si %xmm0, %rcx
1122; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1123; AVX512VL-NEXT:    vcvttss2si %xmm1, %rdx
1124; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1125; AVX512VL-NEXT:    vcvttss2si %xmm0, %rsi
1126; AVX512VL-NEXT:    vmovq %rsi, %xmm0
1127; AVX512VL-NEXT:    vmovq %rdx, %xmm1
1128; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1129; AVX512VL-NEXT:    vmovq %rcx, %xmm1
1130; AVX512VL-NEXT:    vmovq %rax, %xmm2
1131; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1132; AVX512VL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1133; AVX512VL-NEXT:    retq
1134;
1135; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1136; AVX512DQ:       # %bb.0:
1137; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1138; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1139; AVX512DQ-NEXT:    retq
1140;
1141; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1142; AVX512VLDQ:       # %bb.0:
1143; AVX512VLDQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1144; AVX512VLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1145; AVX512VLDQ-NEXT:    retq
1146  %cvt = fptosi <8 x float> %a to <8 x i64>
1147  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1148  ret <4 x i64> %shuf
1149}
1150
1151;
1152; Float to Unsigned Integer
1153;
1154
1155define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1156; SSE-LABEL: fptoui_2f32_to_2i32:
1157; SSE:       # %bb.0:
1158; SSE-NEXT:    cvttps2dq %xmm0, %xmm1
1159; SSE-NEXT:    movdqa %xmm1, %xmm2
1160; SSE-NEXT:    psrad $31, %xmm2
1161; SSE-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1162; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
1163; SSE-NEXT:    pand %xmm2, %xmm0
1164; SSE-NEXT:    por %xmm1, %xmm0
1165; SSE-NEXT:    retq
1166;
1167; AVX1-LABEL: fptoui_2f32_to_2i32:
1168; AVX1:       # %bb.0:
1169; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm1
1170; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
1171; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1172; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
1173; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1174; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
1175; AVX1-NEXT:    retq
1176;
1177; AVX2-LABEL: fptoui_2f32_to_2i32:
1178; AVX2:       # %bb.0:
1179; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1180; AVX2-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1181; AVX2-NEXT:    vcvttps2dq %xmm1, %xmm1
1182; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
1183; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
1184; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1185; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1186; AVX2-NEXT:    retq
1187;
1188; AVX512F-LABEL: fptoui_2f32_to_2i32:
1189; AVX512F:       # %bb.0:
1190; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1191; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1192; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1193; AVX512F-NEXT:    vzeroupper
1194; AVX512F-NEXT:    retq
1195;
1196; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1197; AVX512VL:       # %bb.0:
1198; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
1199; AVX512VL-NEXT:    retq
1200;
1201; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1202; AVX512DQ:       # %bb.0:
1203; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1204; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1205; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1206; AVX512DQ-NEXT:    vzeroupper
1207; AVX512DQ-NEXT:    retq
1208;
1209; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1210; AVX512VLDQ:       # %bb.0:
1211; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
1212; AVX512VLDQ-NEXT:    retq
1213  %cvt = fptoui <2 x float> %a to <2 x i32>
1214  ret <2 x i32> %cvt
1215}
1216
1217define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1218; SSE-LABEL: fptoui_4f32_to_4i32:
1219; SSE:       # %bb.0:
1220; SSE-NEXT:    cvttps2dq %xmm0, %xmm1
1221; SSE-NEXT:    movdqa %xmm1, %xmm2
1222; SSE-NEXT:    psrad $31, %xmm2
1223; SSE-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1224; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
1225; SSE-NEXT:    pand %xmm2, %xmm0
1226; SSE-NEXT:    por %xmm1, %xmm0
1227; SSE-NEXT:    retq
1228;
1229; AVX1-LABEL: fptoui_4f32_to_4i32:
1230; AVX1:       # %bb.0:
1231; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm1
1232; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
1233; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1234; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
1235; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1236; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
1237; AVX1-NEXT:    retq
1238;
1239; AVX2-LABEL: fptoui_4f32_to_4i32:
1240; AVX2:       # %bb.0:
1241; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1242; AVX2-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1243; AVX2-NEXT:    vcvttps2dq %xmm1, %xmm1
1244; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
1245; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
1246; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1247; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1248; AVX2-NEXT:    retq
1249;
1250; AVX512F-LABEL: fptoui_4f32_to_4i32:
1251; AVX512F:       # %bb.0:
1252; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1253; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1254; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1255; AVX512F-NEXT:    vzeroupper
1256; AVX512F-NEXT:    retq
1257;
1258; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1259; AVX512VL:       # %bb.0:
1260; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
1261; AVX512VL-NEXT:    retq
1262;
1263; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1264; AVX512DQ:       # %bb.0:
1265; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1266; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1267; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1268; AVX512DQ-NEXT:    vzeroupper
1269; AVX512DQ-NEXT:    retq
1270;
1271; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1272; AVX512VLDQ:       # %bb.0:
1273; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
1274; AVX512VLDQ-NEXT:    retq
1275  %cvt = fptoui <4 x float> %a to <4 x i32>
1276  ret <4 x i32> %cvt
1277}
1278
1279define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1280; SSE-LABEL: fptoui_2f32_to_2i64:
1281; SSE:       # %bb.0:
1282; SSE-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1283; SSE-NEXT:    movaps %xmm0, %xmm1
1284; SSE-NEXT:    subss %xmm2, %xmm1
1285; SSE-NEXT:    cvttss2si %xmm1, %rax
1286; SSE-NEXT:    cvttss2si %xmm0, %rcx
1287; SSE-NEXT:    movq %rcx, %rdx
1288; SSE-NEXT:    sarq $63, %rdx
1289; SSE-NEXT:    andq %rax, %rdx
1290; SSE-NEXT:    orq %rcx, %rdx
1291; SSE-NEXT:    movq %rdx, %xmm1
1292; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1293; SSE-NEXT:    cvttss2si %xmm0, %rax
1294; SSE-NEXT:    subss %xmm2, %xmm0
1295; SSE-NEXT:    cvttss2si %xmm0, %rcx
1296; SSE-NEXT:    movq %rax, %rdx
1297; SSE-NEXT:    sarq $63, %rdx
1298; SSE-NEXT:    andq %rcx, %rdx
1299; SSE-NEXT:    orq %rax, %rdx
1300; SSE-NEXT:    movq %rdx, %xmm0
1301; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1302; SSE-NEXT:    movdqa %xmm1, %xmm0
1303; SSE-NEXT:    retq
1304;
1305; VEX-LABEL: fptoui_2f32_to_2i64:
1306; VEX:       # %bb.0:
1307; VEX-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1308; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
1309; VEX-NEXT:    vcvttss2si %xmm2, %rax
1310; VEX-NEXT:    vcvttss2si %xmm0, %rcx
1311; VEX-NEXT:    movq %rcx, %rdx
1312; VEX-NEXT:    sarq $63, %rdx
1313; VEX-NEXT:    andq %rax, %rdx
1314; VEX-NEXT:    orq %rcx, %rdx
1315; VEX-NEXT:    vmovq %rdx, %xmm2
1316; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1317; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
1318; VEX-NEXT:    vcvttss2si %xmm1, %rax
1319; VEX-NEXT:    vcvttss2si %xmm0, %rcx
1320; VEX-NEXT:    movq %rcx, %rdx
1321; VEX-NEXT:    sarq $63, %rdx
1322; VEX-NEXT:    andq %rax, %rdx
1323; VEX-NEXT:    orq %rcx, %rdx
1324; VEX-NEXT:    vmovq %rdx, %xmm0
1325; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1326; VEX-NEXT:    retq
1327;
1328; AVX512F-LABEL: fptoui_2f32_to_2i64:
1329; AVX512F:       # %bb.0:
1330; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1331; AVX512F-NEXT:    vmovq %rax, %xmm1
1332; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1333; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1334; AVX512F-NEXT:    vmovq %rax, %xmm0
1335; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1336; AVX512F-NEXT:    retq
1337;
1338; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1339; AVX512VL:       # %bb.0:
1340; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1341; AVX512VL-NEXT:    vmovq %rax, %xmm1
1342; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1343; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1344; AVX512VL-NEXT:    vmovq %rax, %xmm0
1345; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1346; AVX512VL-NEXT:    retq
1347;
1348; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1349; AVX512DQ:       # %bb.0:
1350; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1351; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1352; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1353; AVX512DQ-NEXT:    vzeroupper
1354; AVX512DQ-NEXT:    retq
1355;
1356; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1357; AVX512VLDQ:       # %bb.0:
1358; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
1359; AVX512VLDQ-NEXT:    retq
1360  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1361  %cvt = fptoui <2 x float> %shuf to <2 x i64>
1362  ret <2 x i64> %cvt
1363}
1364
1365define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1366; SSE-LABEL: fptoui_4f32_to_2i64:
1367; SSE:       # %bb.0:
1368; SSE-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1369; SSE-NEXT:    movaps %xmm0, %xmm1
1370; SSE-NEXT:    subss %xmm2, %xmm1
1371; SSE-NEXT:    cvttss2si %xmm1, %rax
1372; SSE-NEXT:    cvttss2si %xmm0, %rcx
1373; SSE-NEXT:    movq %rcx, %rdx
1374; SSE-NEXT:    sarq $63, %rdx
1375; SSE-NEXT:    andq %rax, %rdx
1376; SSE-NEXT:    orq %rcx, %rdx
1377; SSE-NEXT:    movq %rdx, %xmm1
1378; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1379; SSE-NEXT:    cvttss2si %xmm0, %rax
1380; SSE-NEXT:    subss %xmm2, %xmm0
1381; SSE-NEXT:    cvttss2si %xmm0, %rcx
1382; SSE-NEXT:    movq %rax, %rdx
1383; SSE-NEXT:    sarq $63, %rdx
1384; SSE-NEXT:    andq %rcx, %rdx
1385; SSE-NEXT:    orq %rax, %rdx
1386; SSE-NEXT:    movq %rdx, %xmm0
1387; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1388; SSE-NEXT:    movdqa %xmm1, %xmm0
1389; SSE-NEXT:    retq
1390;
1391; VEX-LABEL: fptoui_4f32_to_2i64:
1392; VEX:       # %bb.0:
1393; VEX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1394; VEX-NEXT:    vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1395; VEX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
1396; VEX-NEXT:    vcvttss2si %xmm3, %rax
1397; VEX-NEXT:    vcvttss2si %xmm1, %rcx
1398; VEX-NEXT:    movq %rcx, %rdx
1399; VEX-NEXT:    sarq $63, %rdx
1400; VEX-NEXT:    andq %rax, %rdx
1401; VEX-NEXT:    orq %rcx, %rdx
1402; VEX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
1403; VEX-NEXT:    vcvttss2si %xmm1, %rax
1404; VEX-NEXT:    vcvttss2si %xmm0, %rcx
1405; VEX-NEXT:    movq %rcx, %rsi
1406; VEX-NEXT:    sarq $63, %rsi
1407; VEX-NEXT:    andq %rax, %rsi
1408; VEX-NEXT:    orq %rcx, %rsi
1409; VEX-NEXT:    vmovq %rsi, %xmm0
1410; VEX-NEXT:    vmovq %rdx, %xmm1
1411; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1412; VEX-NEXT:    retq
1413;
1414; AVX512F-LABEL: fptoui_4f32_to_2i64:
1415; AVX512F:       # %bb.0:
1416; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1417; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1418; AVX512F-NEXT:    vcvttss2usi %xmm0, %rcx
1419; AVX512F-NEXT:    vmovq %rcx, %xmm0
1420; AVX512F-NEXT:    vmovq %rax, %xmm1
1421; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1422; AVX512F-NEXT:    retq
1423;
1424; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1425; AVX512VL:       # %bb.0:
1426; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1427; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1428; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rcx
1429; AVX512VL-NEXT:    vmovq %rcx, %xmm0
1430; AVX512VL-NEXT:    vmovq %rax, %xmm1
1431; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1432; AVX512VL-NEXT:    retq
1433;
1434; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1435; AVX512DQ:       # %bb.0:
1436; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1437; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1438; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1439; AVX512DQ-NEXT:    vzeroupper
1440; AVX512DQ-NEXT:    retq
1441;
1442; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1443; AVX512VLDQ:       # %bb.0:
1444; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %ymm0
1445; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1446; AVX512VLDQ-NEXT:    vzeroupper
1447; AVX512VLDQ-NEXT:    retq
1448  %cvt = fptoui <4 x float> %a to <4 x i64>
1449  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1450  ret <2 x i64> %shuf
1451}
1452
1453define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1454; SSE-LABEL: fptoui_8f32_to_8i32:
1455; SSE:       # %bb.0:
1456; SSE-NEXT:    movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1457; SSE-NEXT:    cvttps2dq %xmm0, %xmm3
1458; SSE-NEXT:    subps %xmm2, %xmm0
1459; SSE-NEXT:    cvttps2dq %xmm0, %xmm4
1460; SSE-NEXT:    movdqa %xmm3, %xmm0
1461; SSE-NEXT:    psrad $31, %xmm0
1462; SSE-NEXT:    pand %xmm4, %xmm0
1463; SSE-NEXT:    por %xmm3, %xmm0
1464; SSE-NEXT:    cvttps2dq %xmm1, %xmm3
1465; SSE-NEXT:    subps %xmm2, %xmm1
1466; SSE-NEXT:    cvttps2dq %xmm1, %xmm2
1467; SSE-NEXT:    movdqa %xmm3, %xmm1
1468; SSE-NEXT:    psrad $31, %xmm1
1469; SSE-NEXT:    pand %xmm2, %xmm1
1470; SSE-NEXT:    por %xmm3, %xmm1
1471; SSE-NEXT:    retq
1472;
1473; AVX1-LABEL: fptoui_8f32_to_8i32:
1474; AVX1:       # %bb.0:
1475; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm1
1476; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1477; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
1478; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
1479; AVX1-NEXT:    vblendvps %ymm1, %ymm0, %ymm1, %ymm0
1480; AVX1-NEXT:    retq
1481;
1482; AVX2-LABEL: fptoui_8f32_to_8i32:
1483; AVX2:       # %bb.0:
1484; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1485; AVX2-NEXT:    vsubps %ymm1, %ymm0, %ymm1
1486; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
1487; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
1488; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm2
1489; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
1490; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1491; AVX2-NEXT:    retq
1492;
1493; AVX512F-LABEL: fptoui_8f32_to_8i32:
1494; AVX512F:       # %bb.0:
1495; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1496; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1497; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1498; AVX512F-NEXT:    retq
1499;
1500; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1501; AVX512VL:       # %bb.0:
1502; AVX512VL-NEXT:    vcvttps2udq %ymm0, %ymm0
1503; AVX512VL-NEXT:    retq
1504;
1505; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1506; AVX512DQ:       # %bb.0:
1507; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1508; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1509; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1510; AVX512DQ-NEXT:    retq
1511;
1512; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1513; AVX512VLDQ:       # %bb.0:
1514; AVX512VLDQ-NEXT:    vcvttps2udq %ymm0, %ymm0
1515; AVX512VLDQ-NEXT:    retq
1516  %cvt = fptoui <8 x float> %a to <8 x i32>
1517  ret <8 x i32> %cvt
1518}
1519
1520define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1521; SSE-LABEL: fptoui_4f32_to_4i64:
1522; SSE:       # %bb.0:
1523; SSE-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1524; SSE-NEXT:    movaps %xmm0, %xmm2
1525; SSE-NEXT:    subss %xmm1, %xmm2
1526; SSE-NEXT:    cvttss2si %xmm2, %rax
1527; SSE-NEXT:    cvttss2si %xmm0, %rcx
1528; SSE-NEXT:    movq %rcx, %rdx
1529; SSE-NEXT:    sarq $63, %rdx
1530; SSE-NEXT:    andq %rax, %rdx
1531; SSE-NEXT:    orq %rcx, %rdx
1532; SSE-NEXT:    movq %rdx, %xmm2
1533; SSE-NEXT:    movaps %xmm0, %xmm3
1534; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1535; SSE-NEXT:    cvttss2si %xmm3, %rax
1536; SSE-NEXT:    subss %xmm1, %xmm3
1537; SSE-NEXT:    cvttss2si %xmm3, %rcx
1538; SSE-NEXT:    movq %rax, %rdx
1539; SSE-NEXT:    sarq $63, %rdx
1540; SSE-NEXT:    andq %rcx, %rdx
1541; SSE-NEXT:    orq %rax, %rdx
1542; SSE-NEXT:    movq %rdx, %xmm3
1543; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1544; SSE-NEXT:    movaps %xmm0, %xmm3
1545; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1546; SSE-NEXT:    cvttss2si %xmm3, %rax
1547; SSE-NEXT:    subss %xmm1, %xmm3
1548; SSE-NEXT:    cvttss2si %xmm3, %rcx
1549; SSE-NEXT:    movq %rax, %rdx
1550; SSE-NEXT:    sarq $63, %rdx
1551; SSE-NEXT:    andq %rcx, %rdx
1552; SSE-NEXT:    orq %rax, %rdx
1553; SSE-NEXT:    movq %rdx, %xmm3
1554; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1555; SSE-NEXT:    cvttss2si %xmm0, %rax
1556; SSE-NEXT:    subss %xmm1, %xmm0
1557; SSE-NEXT:    cvttss2si %xmm0, %rcx
1558; SSE-NEXT:    movq %rax, %rdx
1559; SSE-NEXT:    sarq $63, %rdx
1560; SSE-NEXT:    andq %rcx, %rdx
1561; SSE-NEXT:    orq %rax, %rdx
1562; SSE-NEXT:    movq %rdx, %xmm1
1563; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1564; SSE-NEXT:    movdqa %xmm2, %xmm0
1565; SSE-NEXT:    retq
1566;
1567; AVX1-LABEL: fptoui_4f32_to_4i64:
1568; AVX1:       # %bb.0:
1569; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1570; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1571; AVX1-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1572; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1573; AVX1-NEXT:    vcvttss2si %xmm2, %rcx
1574; AVX1-NEXT:    movq %rcx, %rdx
1575; AVX1-NEXT:    sarq $63, %rdx
1576; AVX1-NEXT:    andq %rax, %rdx
1577; AVX1-NEXT:    orq %rcx, %rdx
1578; AVX1-NEXT:    vmovq %rdx, %xmm2
1579; AVX1-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1580; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1581; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1582; AVX1-NEXT:    vcvttss2si %xmm3, %rcx
1583; AVX1-NEXT:    movq %rcx, %rdx
1584; AVX1-NEXT:    sarq $63, %rdx
1585; AVX1-NEXT:    andq %rax, %rdx
1586; AVX1-NEXT:    orq %rcx, %rdx
1587; AVX1-NEXT:    vmovq %rdx, %xmm3
1588; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1589; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1590; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1591; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1592; AVX1-NEXT:    movq %rcx, %rdx
1593; AVX1-NEXT:    sarq $63, %rdx
1594; AVX1-NEXT:    andq %rax, %rdx
1595; AVX1-NEXT:    orq %rcx, %rdx
1596; AVX1-NEXT:    vmovq %rdx, %xmm3
1597; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1598; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm1
1599; AVX1-NEXT:    vcvttss2si %xmm1, %rax
1600; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1601; AVX1-NEXT:    movq %rcx, %rdx
1602; AVX1-NEXT:    sarq $63, %rdx
1603; AVX1-NEXT:    andq %rax, %rdx
1604; AVX1-NEXT:    orq %rcx, %rdx
1605; AVX1-NEXT:    vmovq %rdx, %xmm0
1606; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1607; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1608; AVX1-NEXT:    retq
1609;
1610; AVX2-LABEL: fptoui_4f32_to_4i64:
1611; AVX2:       # %bb.0:
1612; AVX2-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1613; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1614; AVX2-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1615; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1616; AVX2-NEXT:    vcvttss2si %xmm2, %rcx
1617; AVX2-NEXT:    movq %rcx, %rdx
1618; AVX2-NEXT:    sarq $63, %rdx
1619; AVX2-NEXT:    andq %rax, %rdx
1620; AVX2-NEXT:    orq %rcx, %rdx
1621; AVX2-NEXT:    vmovq %rdx, %xmm2
1622; AVX2-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1623; AVX2-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1624; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1625; AVX2-NEXT:    vcvttss2si %xmm3, %rcx
1626; AVX2-NEXT:    movq %rcx, %rdx
1627; AVX2-NEXT:    sarq $63, %rdx
1628; AVX2-NEXT:    andq %rax, %rdx
1629; AVX2-NEXT:    orq %rcx, %rdx
1630; AVX2-NEXT:    vmovq %rdx, %xmm3
1631; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1632; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1633; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1634; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1635; AVX2-NEXT:    movq %rcx, %rdx
1636; AVX2-NEXT:    sarq $63, %rdx
1637; AVX2-NEXT:    andq %rax, %rdx
1638; AVX2-NEXT:    orq %rcx, %rdx
1639; AVX2-NEXT:    vmovq %rdx, %xmm3
1640; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1641; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm1
1642; AVX2-NEXT:    vcvttss2si %xmm1, %rax
1643; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1644; AVX2-NEXT:    movq %rcx, %rdx
1645; AVX2-NEXT:    sarq $63, %rdx
1646; AVX2-NEXT:    andq %rax, %rdx
1647; AVX2-NEXT:    orq %rcx, %rdx
1648; AVX2-NEXT:    vmovq %rdx, %xmm0
1649; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1650; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1651; AVX2-NEXT:    retq
1652;
1653; AVX512F-LABEL: fptoui_4f32_to_4i64:
1654; AVX512F:       # %bb.0:
1655; AVX512F-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1656; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1657; AVX512F-NEXT:    vmovq %rax, %xmm1
1658; AVX512F-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1659; AVX512F-NEXT:    vcvttss2usi %xmm2, %rax
1660; AVX512F-NEXT:    vmovq %rax, %xmm2
1661; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1662; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1663; AVX512F-NEXT:    vmovq %rax, %xmm2
1664; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1665; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1666; AVX512F-NEXT:    vmovq %rax, %xmm0
1667; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1668; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1669; AVX512F-NEXT:    retq
1670;
1671; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1672; AVX512VL:       # %bb.0:
1673; AVX512VL-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1674; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1675; AVX512VL-NEXT:    vmovq %rax, %xmm1
1676; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1677; AVX512VL-NEXT:    vcvttss2usi %xmm2, %rax
1678; AVX512VL-NEXT:    vmovq %rax, %xmm2
1679; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1680; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1681; AVX512VL-NEXT:    vmovq %rax, %xmm2
1682; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1683; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1684; AVX512VL-NEXT:    vmovq %rax, %xmm0
1685; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1686; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1687; AVX512VL-NEXT:    retq
1688;
1689; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1690; AVX512DQ:       # %bb.0:
1691; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1692; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1693; AVX512DQ-NEXT:    retq
1694;
1695; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1696; AVX512VLDQ:       # %bb.0:
1697; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %ymm0
1698; AVX512VLDQ-NEXT:    retq
1699  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1700  %cvt = fptoui <4 x float> %shuf to <4 x i64>
1701  ret <4 x i64> %cvt
1702}
1703
1704define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1705; SSE-LABEL: fptoui_8f32_to_4i64:
1706; SSE:       # %bb.0:
1707; SSE-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1708; SSE-NEXT:    movaps %xmm0, %xmm2
1709; SSE-NEXT:    subss %xmm1, %xmm2
1710; SSE-NEXT:    cvttss2si %xmm2, %rax
1711; SSE-NEXT:    cvttss2si %xmm0, %rcx
1712; SSE-NEXT:    movq %rcx, %rdx
1713; SSE-NEXT:    sarq $63, %rdx
1714; SSE-NEXT:    andq %rax, %rdx
1715; SSE-NEXT:    orq %rcx, %rdx
1716; SSE-NEXT:    movq %rdx, %xmm2
1717; SSE-NEXT:    movaps %xmm0, %xmm3
1718; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1719; SSE-NEXT:    cvttss2si %xmm3, %rax
1720; SSE-NEXT:    subss %xmm1, %xmm3
1721; SSE-NEXT:    cvttss2si %xmm3, %rcx
1722; SSE-NEXT:    movq %rax, %rdx
1723; SSE-NEXT:    sarq $63, %rdx
1724; SSE-NEXT:    andq %rcx, %rdx
1725; SSE-NEXT:    orq %rax, %rdx
1726; SSE-NEXT:    movq %rdx, %xmm3
1727; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1728; SSE-NEXT:    movaps %xmm0, %xmm3
1729; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1730; SSE-NEXT:    cvttss2si %xmm3, %rax
1731; SSE-NEXT:    subss %xmm1, %xmm3
1732; SSE-NEXT:    cvttss2si %xmm3, %rcx
1733; SSE-NEXT:    movq %rax, %rdx
1734; SSE-NEXT:    sarq $63, %rdx
1735; SSE-NEXT:    andq %rcx, %rdx
1736; SSE-NEXT:    orq %rax, %rdx
1737; SSE-NEXT:    movq %rdx, %xmm3
1738; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1739; SSE-NEXT:    cvttss2si %xmm0, %rax
1740; SSE-NEXT:    subss %xmm1, %xmm0
1741; SSE-NEXT:    cvttss2si %xmm0, %rcx
1742; SSE-NEXT:    movq %rax, %rdx
1743; SSE-NEXT:    sarq $63, %rdx
1744; SSE-NEXT:    andq %rcx, %rdx
1745; SSE-NEXT:    orq %rax, %rdx
1746; SSE-NEXT:    movq %rdx, %xmm1
1747; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1748; SSE-NEXT:    movdqa %xmm2, %xmm0
1749; SSE-NEXT:    retq
1750;
1751; AVX1-LABEL: fptoui_8f32_to_4i64:
1752; AVX1:       # %bb.0:
1753; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1754; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1755; AVX1-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1756; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1757; AVX1-NEXT:    vcvttss2si %xmm2, %rcx
1758; AVX1-NEXT:    movq %rcx, %rdx
1759; AVX1-NEXT:    sarq $63, %rdx
1760; AVX1-NEXT:    andq %rax, %rdx
1761; AVX1-NEXT:    orq %rcx, %rdx
1762; AVX1-NEXT:    vmovq %rdx, %xmm2
1763; AVX1-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1764; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1765; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1766; AVX1-NEXT:    vcvttss2si %xmm3, %rcx
1767; AVX1-NEXT:    movq %rcx, %rdx
1768; AVX1-NEXT:    sarq $63, %rdx
1769; AVX1-NEXT:    andq %rax, %rdx
1770; AVX1-NEXT:    orq %rcx, %rdx
1771; AVX1-NEXT:    vmovq %rdx, %xmm3
1772; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1773; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1774; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1775; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1776; AVX1-NEXT:    movq %rcx, %rdx
1777; AVX1-NEXT:    sarq $63, %rdx
1778; AVX1-NEXT:    andq %rax, %rdx
1779; AVX1-NEXT:    orq %rcx, %rdx
1780; AVX1-NEXT:    vmovq %rdx, %xmm3
1781; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1782; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm1
1783; AVX1-NEXT:    vcvttss2si %xmm1, %rax
1784; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1785; AVX1-NEXT:    movq %rcx, %rdx
1786; AVX1-NEXT:    sarq $63, %rdx
1787; AVX1-NEXT:    andq %rax, %rdx
1788; AVX1-NEXT:    orq %rcx, %rdx
1789; AVX1-NEXT:    vmovq %rdx, %xmm0
1790; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1791; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1792; AVX1-NEXT:    retq
1793;
1794; AVX2-LABEL: fptoui_8f32_to_4i64:
1795; AVX2:       # %bb.0:
1796; AVX2-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1797; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1798; AVX2-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1799; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1800; AVX2-NEXT:    vcvttss2si %xmm2, %rcx
1801; AVX2-NEXT:    movq %rcx, %rdx
1802; AVX2-NEXT:    sarq $63, %rdx
1803; AVX2-NEXT:    andq %rax, %rdx
1804; AVX2-NEXT:    orq %rcx, %rdx
1805; AVX2-NEXT:    vmovq %rdx, %xmm2
1806; AVX2-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1807; AVX2-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1808; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1809; AVX2-NEXT:    vcvttss2si %xmm3, %rcx
1810; AVX2-NEXT:    movq %rcx, %rdx
1811; AVX2-NEXT:    sarq $63, %rdx
1812; AVX2-NEXT:    andq %rax, %rdx
1813; AVX2-NEXT:    orq %rcx, %rdx
1814; AVX2-NEXT:    vmovq %rdx, %xmm3
1815; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1816; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1817; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1818; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1819; AVX2-NEXT:    movq %rcx, %rdx
1820; AVX2-NEXT:    sarq $63, %rdx
1821; AVX2-NEXT:    andq %rax, %rdx
1822; AVX2-NEXT:    orq %rcx, %rdx
1823; AVX2-NEXT:    vmovq %rdx, %xmm3
1824; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1825; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm1
1826; AVX2-NEXT:    vcvttss2si %xmm1, %rax
1827; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1828; AVX2-NEXT:    movq %rcx, %rdx
1829; AVX2-NEXT:    sarq $63, %rdx
1830; AVX2-NEXT:    andq %rax, %rdx
1831; AVX2-NEXT:    orq %rcx, %rdx
1832; AVX2-NEXT:    vmovq %rdx, %xmm0
1833; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1834; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1835; AVX2-NEXT:    retq
1836;
1837; AVX512F-LABEL: fptoui_8f32_to_4i64:
1838; AVX512F:       # %bb.0:
1839; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1840; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1841; AVX512F-NEXT:    vcvttss2usi %xmm0, %rcx
1842; AVX512F-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1843; AVX512F-NEXT:    vcvttss2usi %xmm1, %rdx
1844; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1845; AVX512F-NEXT:    vcvttss2usi %xmm0, %rsi
1846; AVX512F-NEXT:    vmovq %rsi, %xmm0
1847; AVX512F-NEXT:    vmovq %rdx, %xmm1
1848; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1849; AVX512F-NEXT:    vmovq %rcx, %xmm1
1850; AVX512F-NEXT:    vmovq %rax, %xmm2
1851; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1852; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1853; AVX512F-NEXT:    retq
1854;
1855; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1856; AVX512VL:       # %bb.0:
1857; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1858; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1859; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rcx
1860; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1861; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rdx
1862; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1863; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rsi
1864; AVX512VL-NEXT:    vmovq %rsi, %xmm0
1865; AVX512VL-NEXT:    vmovq %rdx, %xmm1
1866; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1867; AVX512VL-NEXT:    vmovq %rcx, %xmm1
1868; AVX512VL-NEXT:    vmovq %rax, %xmm2
1869; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1870; AVX512VL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1871; AVX512VL-NEXT:    retq
1872;
1873; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1874; AVX512DQ:       # %bb.0:
1875; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1876; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1877; AVX512DQ-NEXT:    retq
1878;
1879; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1880; AVX512VLDQ:       # %bb.0:
1881; AVX512VLDQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1882; AVX512VLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1883; AVX512VLDQ-NEXT:    retq
1884  %cvt = fptoui <8 x float> %a to <8 x i64>
1885  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1886  ret <4 x i64> %shuf
1887}
1888
1889;
1890; Constant Folding
1891;
1892
1893define <2 x i64> @fptosi_2f64_to_2i64_const() {
1894; SSE-LABEL: fptosi_2f64_to_2i64_const:
1895; SSE:       # %bb.0:
1896; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1897; SSE-NEXT:    retq
1898;
1899; VEX-LABEL: fptosi_2f64_to_2i64_const:
1900; VEX:       # %bb.0:
1901; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1902; VEX-NEXT:    retq
1903;
1904; AVX512-LABEL: fptosi_2f64_to_2i64_const:
1905; AVX512:       # %bb.0:
1906; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [1,18446744073709551615]
1907; AVX512-NEXT:    retq
1908  %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1909  ret <2 x i64> %cvt
1910}
1911
1912define <4 x i32> @fptosi_2f64_to_2i32_const() {
1913; SSE-LABEL: fptosi_2f64_to_2i32_const:
1914; SSE:       # %bb.0:
1915; SSE-NEXT:    movsd {{.*#+}} xmm0 = [4294967295,1,0,0]
1916; SSE-NEXT:    retq
1917;
1918; VEX-LABEL: fptosi_2f64_to_2i32_const:
1919; VEX:       # %bb.0:
1920; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = [4294967295,1,0,0]
1921; VEX-NEXT:    retq
1922;
1923; AVX512-LABEL: fptosi_2f64_to_2i32_const:
1924; AVX512:       # %bb.0:
1925; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,1,0,0]
1926; AVX512-NEXT:    retq
1927  %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1928  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1929  ret <4 x i32> %ext
1930}
1931
1932define <4 x i64> @fptosi_4f64_to_4i64_const() {
1933; SSE-LABEL: fptosi_4f64_to_4i64_const:
1934; SSE:       # %bb.0:
1935; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1936; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1937; SSE-NEXT:    retq
1938;
1939; VEX-LABEL: fptosi_4f64_to_4i64_const:
1940; VEX:       # %bb.0:
1941; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1942; VEX-NEXT:    retq
1943;
1944; AVX512-LABEL: fptosi_4f64_to_4i64_const:
1945; AVX512:       # %bb.0:
1946; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1947; AVX512-NEXT:    retq
1948  %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1949  ret <4 x i64> %cvt
1950}
1951
1952define <4 x i32> @fptosi_4f64_to_4i32_const() {
1953; SSE-LABEL: fptosi_4f64_to_4i32_const:
1954; SSE:       # %bb.0:
1955; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1956; SSE-NEXT:    retq
1957;
1958; VEX-LABEL: fptosi_4f64_to_4i32_const:
1959; VEX:       # %bb.0:
1960; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1961; VEX-NEXT:    retq
1962;
1963; AVX512-LABEL: fptosi_4f64_to_4i32_const:
1964; AVX512:       # %bb.0:
1965; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1966; AVX512-NEXT:    retq
1967  %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
1968  ret <4 x i32> %cvt
1969}
1970
1971define <2 x i64> @fptoui_2f64_to_2i64_const() {
1972; SSE-LABEL: fptoui_2f64_to_2i64_const:
1973; SSE:       # %bb.0:
1974; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
1975; SSE-NEXT:    retq
1976;
1977; VEX-LABEL: fptoui_2f64_to_2i64_const:
1978; VEX:       # %bb.0:
1979; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
1980; VEX-NEXT:    retq
1981;
1982; AVX512-LABEL: fptoui_2f64_to_2i64_const:
1983; AVX512:       # %bb.0:
1984; AVX512-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [2,4]
1985; AVX512-NEXT:    retq
1986  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
1987  ret <2 x i64> %cvt
1988}
1989
1990define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
1991; SSE-LABEL: fptoui_2f64_to_2i32_const:
1992; SSE:       # %bb.0:
1993; SSE-NEXT:    movsd {{.*#+}} xmm0 = [2,4,0,0]
1994; SSE-NEXT:    retq
1995;
1996; VEX-LABEL: fptoui_2f64_to_2i32_const:
1997; VEX:       # %bb.0:
1998; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = [2,4,0,0]
1999; VEX-NEXT:    retq
2000;
2001; AVX512-LABEL: fptoui_2f64_to_2i32_const:
2002; AVX512:       # %bb.0:
2003; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,4,0,0]
2004; AVX512-NEXT:    retq
2005  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
2006  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2007  ret <4 x i32> %ext
2008}
2009
2010define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
2011; SSE-LABEL: fptoui_4f64_to_4i64_const:
2012; SSE:       # %bb.0:
2013; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
2014; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
2015; SSE-NEXT:    retq
2016;
2017; VEX-LABEL: fptoui_4f64_to_4i64_const:
2018; VEX:       # %bb.0:
2019; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
2020; VEX-NEXT:    retq
2021;
2022; AVX512-LABEL: fptoui_4f64_to_4i64_const:
2023; AVX512:       # %bb.0:
2024; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [2,4,6,8]
2025; AVX512-NEXT:    retq
2026  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
2027  ret <4 x i64> %cvt
2028}
2029
2030define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
2031; SSE-LABEL: fptoui_4f64_to_4i32_const:
2032; SSE:       # %bb.0:
2033; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
2034; SSE-NEXT:    retq
2035;
2036; VEX-LABEL: fptoui_4f64_to_4i32_const:
2037; VEX:       # %bb.0:
2038; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2039; VEX-NEXT:    retq
2040;
2041; AVX512-LABEL: fptoui_4f64_to_4i32_const:
2042; AVX512:       # %bb.0:
2043; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,4,6,8]
2044; AVX512-NEXT:    retq
2045  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2046  ret <4 x i32> %cvt
2047}
2048
2049define <4 x i32> @fptosi_4f32_to_4i32_const() {
2050; SSE-LABEL: fptosi_4f32_to_4i32_const:
2051; SSE:       # %bb.0:
2052; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2053; SSE-NEXT:    retq
2054;
2055; VEX-LABEL: fptosi_4f32_to_4i32_const:
2056; VEX:       # %bb.0:
2057; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2058; VEX-NEXT:    retq
2059;
2060; AVX512-LABEL: fptosi_4f32_to_4i32_const:
2061; AVX512:       # %bb.0:
2062; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [1,4294967295,2,3]
2063; AVX512-NEXT:    retq
2064  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2065  ret <4 x i32> %cvt
2066}
2067
2068define <4 x i64> @fptosi_4f32_to_4i64_const() {
2069; SSE-LABEL: fptosi_4f32_to_4i64_const:
2070; SSE:       # %bb.0:
2071; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2072; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
2073; SSE-NEXT:    retq
2074;
2075; VEX-LABEL: fptosi_4f32_to_4i64_const:
2076; VEX:       # %bb.0:
2077; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2078; VEX-NEXT:    retq
2079;
2080; AVX512-LABEL: fptosi_4f32_to_4i64_const:
2081; AVX512:       # %bb.0:
2082; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2083; AVX512-NEXT:    retq
2084  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2085  ret <4 x i64> %cvt
2086}
2087
2088define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2089; SSE-LABEL: fptosi_8f32_to_8i32_const:
2090; SSE:       # %bb.0:
2091; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2092; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2093; SSE-NEXT:    retq
2094;
2095; VEX-LABEL: fptosi_8f32_to_8i32_const:
2096; VEX:       # %bb.0:
2097; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2098; VEX-NEXT:    retq
2099;
2100; AVX512-LABEL: fptosi_8f32_to_8i32_const:
2101; AVX512:       # %bb.0:
2102; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2103; AVX512-NEXT:    retq
2104  %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2105  ret <8 x i32> %cvt
2106}
2107
2108define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2109; SSE-LABEL: fptoui_4f32_to_4i32_const:
2110; SSE:       # %bb.0:
2111; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
2112; SSE-NEXT:    retq
2113;
2114; VEX-LABEL: fptoui_4f32_to_4i32_const:
2115; VEX:       # %bb.0:
2116; VEX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2117; VEX-NEXT:    retq
2118;
2119; AVX512-LABEL: fptoui_4f32_to_4i32_const:
2120; AVX512:       # %bb.0:
2121; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [1,2,4,6]
2122; AVX512-NEXT:    retq
2123  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2124  ret <4 x i32> %cvt
2125}
2126
2127define <4 x i64> @fptoui_4f32_to_4i64_const() {
2128; SSE-LABEL: fptoui_4f32_to_4i64_const:
2129; SSE:       # %bb.0:
2130; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
2131; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
2132; SSE-NEXT:    retq
2133;
2134; VEX-LABEL: fptoui_4f32_to_4i64_const:
2135; VEX:       # %bb.0:
2136; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2137; VEX-NEXT:    retq
2138;
2139; AVX512-LABEL: fptoui_4f32_to_4i64_const:
2140; AVX512:       # %bb.0:
2141; AVX512-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8]
2142; AVX512-NEXT:    retq
2143  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2144  ret <4 x i64> %cvt
2145}
2146
2147define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2148; SSE-LABEL: fptoui_8f32_to_8i32_const:
2149; SSE:       # %bb.0:
2150; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
2151; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
2152; SSE-NEXT:    retq
2153;
2154; VEX-LABEL: fptoui_8f32_to_8i32_const:
2155; VEX:       # %bb.0:
2156; VEX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2157; VEX-NEXT:    retq
2158;
2159; AVX512-LABEL: fptoui_8f32_to_8i32_const:
2160; AVX512:       # %bb.0:
2161; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2162; AVX512-NEXT:    retq
2163  %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2164  ret <8 x i32> %cvt
2165}
2166
2167define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2168; SSE-LABEL: fptosi_2f80_to_4i32:
2169; SSE:       # %bb.0:
2170; SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
2171; SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
2172; SSE-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
2173; SSE-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
2174; SSE-NEXT:    orl $3072, %eax # imm = 0xC00
2175; SSE-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
2176; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2177; SSE-NEXT:    fistpl -{{[0-9]+}}(%rsp)
2178; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2179; SSE-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
2180; SSE-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
2181; SSE-NEXT:    orl $3072, %eax # imm = 0xC00
2182; SSE-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
2183; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2184; SSE-NEXT:    fistpl -{{[0-9]+}}(%rsp)
2185; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2186; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2187; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2188; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2189; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2190; SSE-NEXT:    retq
2191;
2192; AVX-LABEL: fptosi_2f80_to_4i32:
2193; AVX:       # %bb.0:
2194; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
2195; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
2196; AVX-NEXT:    fisttpl -{{[0-9]+}}(%rsp)
2197; AVX-NEXT:    fisttpl -{{[0-9]+}}(%rsp)
2198; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2199; AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2200; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2201; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2202; AVX-NEXT:    retq
2203  %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2204  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2205  ret <4 x i32> %ext
2206}
2207
2208define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2209; SSE-LABEL: fptosi_2f128_to_4i32:
2210; SSE:       # %bb.0:
2211; SSE-NEXT:    pushq %rbx
2212; SSE-NEXT:    subq $16, %rsp
2213; SSE-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
2214; SSE-NEXT:    callq __fixtfsi@PLT
2215; SSE-NEXT:    movl %eax, %ebx
2216; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2217; SSE-NEXT:    callq __fixtfsi@PLT
2218; SSE-NEXT:    movd %eax, %xmm0
2219; SSE-NEXT:    movd %ebx, %xmm1
2220; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2221; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2222; SSE-NEXT:    addq $16, %rsp
2223; SSE-NEXT:    popq %rbx
2224; SSE-NEXT:    retq
2225;
2226; AVX-LABEL: fptosi_2f128_to_4i32:
2227; AVX:       # %bb.0:
2228; AVX-NEXT:    pushq %rbx
2229; AVX-NEXT:    subq $16, %rsp
2230; AVX-NEXT:    vmovaps %xmm1, (%rsp) # 16-byte Spill
2231; AVX-NEXT:    callq __fixtfsi@PLT
2232; AVX-NEXT:    movl %eax, %ebx
2233; AVX-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
2234; AVX-NEXT:    callq __fixtfsi@PLT
2235; AVX-NEXT:    vmovd %eax, %xmm0
2236; AVX-NEXT:    vmovd %ebx, %xmm1
2237; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2238; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2239; AVX-NEXT:    addq $16, %rsp
2240; AVX-NEXT:    popq %rbx
2241; AVX-NEXT:    retq
2242  %cvt = fptosi <2 x fp128> %a to <2 x i32>
2243  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2244  ret <4 x i32> %ext
2245}
2246
2247define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2248; SSE-LABEL: fptosi_2f32_to_2i8:
2249; SSE:       # %bb.0:
2250; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2251; SSE-NEXT:    packssdw %xmm0, %xmm0
2252; SSE-NEXT:    packsswb %xmm0, %xmm0
2253; SSE-NEXT:    retq
2254;
2255; VEX-LABEL: fptosi_2f32_to_2i8:
2256; VEX:       # %bb.0:
2257; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
2258; VEX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2259; VEX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2260; VEX-NEXT:    retq
2261;
2262; AVX512F-LABEL: fptosi_2f32_to_2i8:
2263; AVX512F:       # %bb.0:
2264; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2265; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2266; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2267; AVX512F-NEXT:    retq
2268;
2269; AVX512VL-LABEL: fptosi_2f32_to_2i8:
2270; AVX512VL:       # %bb.0:
2271; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2272; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2273; AVX512VL-NEXT:    retq
2274;
2275; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
2276; AVX512DQ:       # %bb.0:
2277; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2278; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2279; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2280; AVX512DQ-NEXT:    retq
2281;
2282; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
2283; AVX512VLDQ:       # %bb.0:
2284; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2285; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2286; AVX512VLDQ-NEXT:    retq
2287  %cvt = fptosi <2 x float> %a to <2 x i8>
2288  ret <2 x i8> %cvt
2289}
2290
2291define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2292; SSE-LABEL: fptosi_2f32_to_2i16:
2293; SSE:       # %bb.0:
2294; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2295; SSE-NEXT:    packssdw %xmm0, %xmm0
2296; SSE-NEXT:    retq
2297;
2298; AVX-LABEL: fptosi_2f32_to_2i16:
2299; AVX:       # %bb.0:
2300; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2301; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2302; AVX-NEXT:    retq
2303  %cvt = fptosi <2 x float> %a to <2 x i16>
2304  ret <2 x i16> %cvt
2305}
2306
2307define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2308; SSE-LABEL: fptoui_2f32_to_2i8:
2309; SSE:       # %bb.0:
2310; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2311; SSE-NEXT:    packuswb %xmm0, %xmm0
2312; SSE-NEXT:    packuswb %xmm0, %xmm0
2313; SSE-NEXT:    retq
2314;
2315; VEX-LABEL: fptoui_2f32_to_2i8:
2316; VEX:       # %bb.0:
2317; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
2318; VEX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2319; VEX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2320; VEX-NEXT:    retq
2321;
2322; AVX512F-LABEL: fptoui_2f32_to_2i8:
2323; AVX512F:       # %bb.0:
2324; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2325; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2326; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2327; AVX512F-NEXT:    retq
2328;
2329; AVX512VL-LABEL: fptoui_2f32_to_2i8:
2330; AVX512VL:       # %bb.0:
2331; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2332; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2333; AVX512VL-NEXT:    retq
2334;
2335; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
2336; AVX512DQ:       # %bb.0:
2337; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2338; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2339; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2340; AVX512DQ-NEXT:    retq
2341;
2342; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
2343; AVX512VLDQ:       # %bb.0:
2344; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2345; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2346; AVX512VLDQ-NEXT:    retq
2347  %cvt = fptoui <2 x float> %a to <2 x i8>
2348  ret <2 x i8> %cvt
2349}
2350
2351define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2352; SSE-LABEL: fptoui_2f32_to_2i16:
2353; SSE:       # %bb.0:
2354; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2355; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2356; SSE-NEXT:    retq
2357;
2358; AVX-LABEL: fptoui_2f32_to_2i16:
2359; AVX:       # %bb.0:
2360; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2361; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2362; AVX-NEXT:    retq
2363  %cvt = fptoui <2 x float> %a to <2 x i16>
2364  ret <2 x i16> %cvt
2365}
2366
2367define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2368; SSE-LABEL: fptosi_2f64_to_2i8:
2369; SSE:       # %bb.0:
2370; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2371; SSE-NEXT:    packssdw %xmm0, %xmm0
2372; SSE-NEXT:    packsswb %xmm0, %xmm0
2373; SSE-NEXT:    retq
2374;
2375; VEX-LABEL: fptosi_2f64_to_2i8:
2376; VEX:       # %bb.0:
2377; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2378; VEX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2379; VEX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2380; VEX-NEXT:    retq
2381;
2382; AVX512F-LABEL: fptosi_2f64_to_2i8:
2383; AVX512F:       # %bb.0:
2384; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2385; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2386; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2387; AVX512F-NEXT:    retq
2388;
2389; AVX512VL-LABEL: fptosi_2f64_to_2i8:
2390; AVX512VL:       # %bb.0:
2391; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2392; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2393; AVX512VL-NEXT:    retq
2394;
2395; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
2396; AVX512DQ:       # %bb.0:
2397; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2398; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2399; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2400; AVX512DQ-NEXT:    retq
2401;
2402; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
2403; AVX512VLDQ:       # %bb.0:
2404; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2405; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2406; AVX512VLDQ-NEXT:    retq
2407  %cvt = fptosi <2 x double> %a to <2 x i8>
2408  ret <2 x i8> %cvt
2409}
2410
2411define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2412; SSE-LABEL: fptosi_2f64_to_2i16:
2413; SSE:       # %bb.0:
2414; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2415; SSE-NEXT:    packssdw %xmm0, %xmm0
2416; SSE-NEXT:    retq
2417;
2418; AVX-LABEL: fptosi_2f64_to_2i16:
2419; AVX:       # %bb.0:
2420; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2421; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2422; AVX-NEXT:    retq
2423  %cvt = fptosi <2 x double> %a to <2 x i16>
2424  ret <2 x i16> %cvt
2425}
2426
2427define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2428; SSE-LABEL: fptoui_2f64_to_2i8:
2429; SSE:       # %bb.0:
2430; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2431; SSE-NEXT:    packuswb %xmm0, %xmm0
2432; SSE-NEXT:    packuswb %xmm0, %xmm0
2433; SSE-NEXT:    retq
2434;
2435; VEX-LABEL: fptoui_2f64_to_2i8:
2436; VEX:       # %bb.0:
2437; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2438; VEX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2439; VEX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2440; VEX-NEXT:    retq
2441;
2442; AVX512F-LABEL: fptoui_2f64_to_2i8:
2443; AVX512F:       # %bb.0:
2444; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2445; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2446; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2447; AVX512F-NEXT:    retq
2448;
2449; AVX512VL-LABEL: fptoui_2f64_to_2i8:
2450; AVX512VL:       # %bb.0:
2451; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2452; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2453; AVX512VL-NEXT:    retq
2454;
2455; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
2456; AVX512DQ:       # %bb.0:
2457; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2458; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2459; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2460; AVX512DQ-NEXT:    retq
2461;
2462; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
2463; AVX512VLDQ:       # %bb.0:
2464; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2465; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2466; AVX512VLDQ-NEXT:    retq
2467  %cvt = fptoui <2 x double> %a to <2 x i8>
2468  ret <2 x i8> %cvt
2469}
2470
2471define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2472; SSE-LABEL: fptoui_2f64_to_2i16:
2473; SSE:       # %bb.0:
2474; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2475; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2476; SSE-NEXT:    retq
2477;
2478; AVX-LABEL: fptoui_2f64_to_2i16:
2479; AVX:       # %bb.0:
2480; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2481; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2482; AVX-NEXT:    retq
2483  %cvt = fptoui <2 x double> %a to <2 x i16>
2484  ret <2 x i16> %cvt
2485}
2486
2487define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2488; SSE-LABEL: fptosi_8f64_to_8i16:
2489; SSE:       # %bb.0:
2490; SSE-NEXT:    cvttpd2dq %xmm3, %xmm3
2491; SSE-NEXT:    cvttpd2dq %xmm2, %xmm2
2492; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2493; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
2494; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2495; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2496; SSE-NEXT:    packssdw %xmm2, %xmm0
2497; SSE-NEXT:    retq
2498;
2499; VEX-LABEL: fptosi_8f64_to_8i16:
2500; VEX:       # %bb.0:
2501; VEX-NEXT:    vcvttpd2dq %ymm1, %xmm1
2502; VEX-NEXT:    vcvttpd2dq %ymm0, %xmm0
2503; VEX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2504; VEX-NEXT:    vzeroupper
2505; VEX-NEXT:    retq
2506;
2507; AVX512F-LABEL: fptosi_8f64_to_8i16:
2508; AVX512F:       # %bb.0:
2509; AVX512F-NEXT:    vcvttpd2dq %zmm0, %ymm0
2510; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2511; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2512; AVX512F-NEXT:    vzeroupper
2513; AVX512F-NEXT:    retq
2514;
2515; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2516; AVX512VL:       # %bb.0:
2517; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2518; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
2519; AVX512VL-NEXT:    vzeroupper
2520; AVX512VL-NEXT:    retq
2521;
2522; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2523; AVX512DQ:       # %bb.0:
2524; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2525; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
2526; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2527; AVX512DQ-NEXT:    vzeroupper
2528; AVX512DQ-NEXT:    retq
2529;
2530; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2531; AVX512VLDQ:       # %bb.0:
2532; AVX512VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2533; AVX512VLDQ-NEXT:    vpmovdw %ymm0, %xmm0
2534; AVX512VLDQ-NEXT:    vzeroupper
2535; AVX512VLDQ-NEXT:    retq
2536  %cvt = fptosi <8 x double> %a to <8 x i16>
2537  ret <8 x i16> %cvt
2538}
2539
2540define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2541; SSE-LABEL: fptoui_8f64_to_8i16:
2542; SSE:       # %bb.0:
2543; SSE-NEXT:    cvttpd2dq %xmm3, %xmm3
2544; SSE-NEXT:    cvttpd2dq %xmm2, %xmm2
2545; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2546; SSE-NEXT:    pslld $16, %xmm2
2547; SSE-NEXT:    psrad $16, %xmm2
2548; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
2549; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2550; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2551; SSE-NEXT:    pslld $16, %xmm0
2552; SSE-NEXT:    psrad $16, %xmm0
2553; SSE-NEXT:    packssdw %xmm2, %xmm0
2554; SSE-NEXT:    retq
2555;
2556; VEX-LABEL: fptoui_8f64_to_8i16:
2557; VEX:       # %bb.0:
2558; VEX-NEXT:    vcvttpd2dq %ymm1, %xmm1
2559; VEX-NEXT:    vcvttpd2dq %ymm0, %xmm0
2560; VEX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
2561; VEX-NEXT:    vzeroupper
2562; VEX-NEXT:    retq
2563;
2564; AVX512F-LABEL: fptoui_8f64_to_8i16:
2565; AVX512F:       # %bb.0:
2566; AVX512F-NEXT:    vcvttpd2dq %zmm0, %ymm0
2567; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2568; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2569; AVX512F-NEXT:    vzeroupper
2570; AVX512F-NEXT:    retq
2571;
2572; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2573; AVX512VL:       # %bb.0:
2574; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2575; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
2576; AVX512VL-NEXT:    vzeroupper
2577; AVX512VL-NEXT:    retq
2578;
2579; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2580; AVX512DQ:       # %bb.0:
2581; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2582; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
2583; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2584; AVX512DQ-NEXT:    vzeroupper
2585; AVX512DQ-NEXT:    retq
2586;
2587; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2588; AVX512VLDQ:       # %bb.0:
2589; AVX512VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2590; AVX512VLDQ-NEXT:    vpmovdw %ymm0, %xmm0
2591; AVX512VLDQ-NEXT:    vzeroupper
2592; AVX512VLDQ-NEXT:    retq
2593  %cvt = fptoui <8 x double> %a to <8 x i16>
2594  ret <8 x i16> %cvt
2595}
2596
2597define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2598; SSE-LABEL: fptosi_16f32_to_16i8:
2599; SSE:       # %bb.0:
2600; SSE-NEXT:    cvttps2dq %xmm3, %xmm3
2601; SSE-NEXT:    cvttps2dq %xmm2, %xmm2
2602; SSE-NEXT:    packssdw %xmm3, %xmm2
2603; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
2604; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2605; SSE-NEXT:    packssdw %xmm1, %xmm0
2606; SSE-NEXT:    packsswb %xmm2, %xmm0
2607; SSE-NEXT:    retq
2608;
2609; AVX1-LABEL: fptosi_16f32_to_16i8:
2610; AVX1:       # %bb.0:
2611; AVX1-NEXT:    vcvttps2dq %ymm1, %ymm1
2612; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2613; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2614; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
2615; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2616; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2617; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2618; AVX1-NEXT:    vzeroupper
2619; AVX1-NEXT:    retq
2620;
2621; AVX2-LABEL: fptosi_16f32_to_16i8:
2622; AVX2:       # %bb.0:
2623; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
2624; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
2625; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2626; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
2627; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2628; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2629; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2630; AVX2-NEXT:    vzeroupper
2631; AVX2-NEXT:    retq
2632;
2633; AVX512-LABEL: fptosi_16f32_to_16i8:
2634; AVX512:       # %bb.0:
2635; AVX512-NEXT:    vcvttps2dq %zmm0, %zmm0
2636; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
2637; AVX512-NEXT:    vzeroupper
2638; AVX512-NEXT:    retq
2639  %cvt = fptosi <16 x float> %a to <16 x i8>
2640  ret <16 x i8> %cvt
2641}
2642
2643define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2644; SSE-LABEL: fptoui_16f32_to_16i8:
2645; SSE:       # %bb.0:
2646; SSE-NEXT:    cvttps2dq %xmm3, %xmm3
2647; SSE-NEXT:    cvttps2dq %xmm2, %xmm2
2648; SSE-NEXT:    packssdw %xmm3, %xmm2
2649; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
2650; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2651; SSE-NEXT:    packssdw %xmm1, %xmm0
2652; SSE-NEXT:    packuswb %xmm2, %xmm0
2653; SSE-NEXT:    retq
2654;
2655; AVX1-LABEL: fptoui_16f32_to_16i8:
2656; AVX1:       # %bb.0:
2657; AVX1-NEXT:    vcvttps2dq %ymm1, %ymm1
2658; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2659; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2660; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
2661; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2662; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2663; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2664; AVX1-NEXT:    vzeroupper
2665; AVX1-NEXT:    retq
2666;
2667; AVX2-LABEL: fptoui_16f32_to_16i8:
2668; AVX2:       # %bb.0:
2669; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
2670; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
2671; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2672; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
2673; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2674; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2675; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2676; AVX2-NEXT:    vzeroupper
2677; AVX2-NEXT:    retq
2678;
2679; AVX512-LABEL: fptoui_16f32_to_16i8:
2680; AVX512:       # %bb.0:
2681; AVX512-NEXT:    vcvttps2dq %zmm0, %zmm0
2682; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
2683; AVX512-NEXT:    vzeroupper
2684; AVX512-NEXT:    retq
2685  %cvt = fptoui <16 x float> %a to <16 x i8>
2686  ret <16 x i8> %cvt
2687}
2688
2689define <2 x i64> @fptosi_2f32_to_2i64_load(ptr %x) {
2690; SSE-LABEL: fptosi_2f32_to_2i64_load:
2691; SSE:       # %bb.0:
2692; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2693; SSE-NEXT:    cvttss2si %xmm1, %rax
2694; SSE-NEXT:    movq %rax, %xmm0
2695; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2696; SSE-NEXT:    cvttss2si %xmm1, %rax
2697; SSE-NEXT:    movq %rax, %xmm1
2698; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2699; SSE-NEXT:    retq
2700;
2701; VEX-LABEL: fptosi_2f32_to_2i64_load:
2702; VEX:       # %bb.0:
2703; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2704; VEX-NEXT:    vcvttss2si %xmm0, %rax
2705; VEX-NEXT:    vmovq %rax, %xmm1
2706; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2707; VEX-NEXT:    vcvttss2si %xmm0, %rax
2708; VEX-NEXT:    vmovq %rax, %xmm0
2709; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2710; VEX-NEXT:    retq
2711;
2712; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2713; AVX512F:       # %bb.0:
2714; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2715; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
2716; AVX512F-NEXT:    vmovq %rax, %xmm1
2717; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2718; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
2719; AVX512F-NEXT:    vmovq %rax, %xmm0
2720; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2721; AVX512F-NEXT:    retq
2722;
2723; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2724; AVX512VL:       # %bb.0:
2725; AVX512VL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2726; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
2727; AVX512VL-NEXT:    vmovq %rax, %xmm1
2728; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2729; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
2730; AVX512VL-NEXT:    vmovq %rax, %xmm0
2731; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2732; AVX512VL-NEXT:    retq
2733;
2734; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2735; AVX512DQ:       # %bb.0:
2736; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2737; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
2738; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2739; AVX512DQ-NEXT:    vzeroupper
2740; AVX512DQ-NEXT:    retq
2741;
2742; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2743; AVX512VLDQ:       # %bb.0:
2744; AVX512VLDQ-NEXT:    vcvttps2qq (%rdi), %xmm0
2745; AVX512VLDQ-NEXT:    retq
2746  %a = load <2 x float>, ptr %x
2747  %b = fptosi <2 x float> %a to <2 x i64>
2748  ret <2 x i64> %b
2749}
2750
2751define <2 x i64> @fptoui_2f32_to_2i64_load(ptr %x) {
2752; SSE-LABEL: fptoui_2f32_to_2i64_load:
2753; SSE:       # %bb.0:
2754; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2755; SSE-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2756; SSE-NEXT:    movaps %xmm1, %xmm0
2757; SSE-NEXT:    subss %xmm2, %xmm0
2758; SSE-NEXT:    cvttss2si %xmm0, %rax
2759; SSE-NEXT:    cvttss2si %xmm1, %rcx
2760; SSE-NEXT:    movq %rcx, %rdx
2761; SSE-NEXT:    sarq $63, %rdx
2762; SSE-NEXT:    andq %rax, %rdx
2763; SSE-NEXT:    orq %rcx, %rdx
2764; SSE-NEXT:    movq %rdx, %xmm0
2765; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2766; SSE-NEXT:    cvttss2si %xmm1, %rax
2767; SSE-NEXT:    subss %xmm2, %xmm1
2768; SSE-NEXT:    cvttss2si %xmm1, %rcx
2769; SSE-NEXT:    movq %rax, %rdx
2770; SSE-NEXT:    sarq $63, %rdx
2771; SSE-NEXT:    andq %rcx, %rdx
2772; SSE-NEXT:    orq %rax, %rdx
2773; SSE-NEXT:    movq %rdx, %xmm1
2774; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2775; SSE-NEXT:    retq
2776;
2777; VEX-LABEL: fptoui_2f32_to_2i64_load:
2778; VEX:       # %bb.0:
2779; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2780; VEX-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2781; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
2782; VEX-NEXT:    vcvttss2si %xmm2, %rax
2783; VEX-NEXT:    vcvttss2si %xmm0, %rcx
2784; VEX-NEXT:    movq %rcx, %rdx
2785; VEX-NEXT:    sarq $63, %rdx
2786; VEX-NEXT:    andq %rax, %rdx
2787; VEX-NEXT:    orq %rcx, %rdx
2788; VEX-NEXT:    vmovq %rdx, %xmm2
2789; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2790; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
2791; VEX-NEXT:    vcvttss2si %xmm1, %rax
2792; VEX-NEXT:    vcvttss2si %xmm0, %rcx
2793; VEX-NEXT:    movq %rcx, %rdx
2794; VEX-NEXT:    sarq $63, %rdx
2795; VEX-NEXT:    andq %rax, %rdx
2796; VEX-NEXT:    orq %rcx, %rdx
2797; VEX-NEXT:    vmovq %rdx, %xmm0
2798; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2799; VEX-NEXT:    retq
2800;
2801; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2802; AVX512F:       # %bb.0:
2803; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2804; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
2805; AVX512F-NEXT:    vmovq %rax, %xmm1
2806; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2807; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
2808; AVX512F-NEXT:    vmovq %rax, %xmm0
2809; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2810; AVX512F-NEXT:    retq
2811;
2812; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2813; AVX512VL:       # %bb.0:
2814; AVX512VL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2815; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
2816; AVX512VL-NEXT:    vmovq %rax, %xmm1
2817; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2818; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
2819; AVX512VL-NEXT:    vmovq %rax, %xmm0
2820; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2821; AVX512VL-NEXT:    retq
2822;
2823; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2824; AVX512DQ:       # %bb.0:
2825; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2826; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
2827; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2828; AVX512DQ-NEXT:    vzeroupper
2829; AVX512DQ-NEXT:    retq
2830;
2831; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2832; AVX512VLDQ:       # %bb.0:
2833; AVX512VLDQ-NEXT:    vcvttps2uqq (%rdi), %xmm0
2834; AVX512VLDQ-NEXT:    retq
2835  %a = load <2 x float>, ptr %x
2836  %b = fptoui <2 x float> %a to <2 x i64>
2837  ret <2 x i64> %b
2838}
2839