xref: /llvm-project/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-32
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-64
4; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-32
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-64
6; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-32
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-64
8; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-32
9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-64
10; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-32
11; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-64
12; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-32
13; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-64
14
15declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
16declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
17declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
18declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
19declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
20declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
21declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
22declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
23declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double>, metadata)
24declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double>, metadata)
25declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float>, metadata)
26declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float>, metadata)
27declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double>, metadata)
28declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double>, metadata)
29declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float>, metadata)
30declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float>, metadata)
31declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double>, metadata)
32declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double>, metadata)
33declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float>, metadata)
34declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float>, metadata)
35declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
36declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
37declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f32(<4 x float>, metadata)
38declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f32(<4 x float>, metadata)
39declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata)
40declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata)
41declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float>, metadata)
42declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float>, metadata)
43
44define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
45; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
46; SSE-32:       # %bb.0:
47; SSE-32-NEXT:    pushl %ebp
48; SSE-32-NEXT:    .cfi_def_cfa_offset 8
49; SSE-32-NEXT:    .cfi_offset %ebp, -8
50; SSE-32-NEXT:    movl %esp, %ebp
51; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
52; SSE-32-NEXT:    andl $-8, %esp
53; SSE-32-NEXT:    subl $24, %esp
54; SSE-32-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
55; SSE-32-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
56; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
57; SSE-32-NEXT:    wait
58; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
59; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
60; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
61; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
62; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
63; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
64; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
65; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
66; SSE-32-NEXT:    wait
67; SSE-32-NEXT:    fnstcw (%esp)
68; SSE-32-NEXT:    movzwl (%esp), %eax
69; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
70; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
71; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
72; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
73; SSE-32-NEXT:    fldcw (%esp)
74; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
75; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
76; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
77; SSE-32-NEXT:    movl %ebp, %esp
78; SSE-32-NEXT:    popl %ebp
79; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
80; SSE-32-NEXT:    retl
81;
82; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
83; SSE-64:       # %bb.0:
84; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
85; SSE-64-NEXT:    movq %rax, %xmm1
86; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
87; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
88; SSE-64-NEXT:    movq %rax, %xmm0
89; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
90; SSE-64-NEXT:    movdqa %xmm1, %xmm0
91; SSE-64-NEXT:    retq
92;
93; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
94; AVX-32:       # %bb.0:
95; AVX-32-NEXT:    pushl %ebp
96; AVX-32-NEXT:    .cfi_def_cfa_offset 8
97; AVX-32-NEXT:    .cfi_offset %ebp, -8
98; AVX-32-NEXT:    movl %esp, %ebp
99; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
100; AVX-32-NEXT:    andl $-8, %esp
101; AVX-32-NEXT:    subl $16, %esp
102; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
103; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
104; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
105; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
106; AVX-32-NEXT:    fldl (%esp)
107; AVX-32-NEXT:    fisttpll (%esp)
108; AVX-32-NEXT:    wait
109; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
110; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
111; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
112; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
113; AVX-32-NEXT:    movl %ebp, %esp
114; AVX-32-NEXT:    popl %ebp
115; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
116; AVX-32-NEXT:    retl
117;
118; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
119; AVX-64:       # %bb.0:
120; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
121; AVX-64-NEXT:    vmovq %rax, %xmm1
122; AVX-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
123; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
124; AVX-64-NEXT:    vmovq %rax, %xmm0
125; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
126; AVX-64-NEXT:    retq
127;
128; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
129; AVX512F-32:       # %bb.0:
130; AVX512F-32-NEXT:    pushl %ebp
131; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
132; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
133; AVX512F-32-NEXT:    movl %esp, %ebp
134; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
135; AVX512F-32-NEXT:    andl $-8, %esp
136; AVX512F-32-NEXT:    subl $16, %esp
137; AVX512F-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
138; AVX512F-32-NEXT:    vmovhps %xmm0, (%esp)
139; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
140; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
141; AVX512F-32-NEXT:    fldl (%esp)
142; AVX512F-32-NEXT:    fisttpll (%esp)
143; AVX512F-32-NEXT:    wait
144; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
145; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
146; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
147; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
148; AVX512F-32-NEXT:    movl %ebp, %esp
149; AVX512F-32-NEXT:    popl %ebp
150; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
151; AVX512F-32-NEXT:    retl
152;
153; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
154; AVX512F-64:       # %bb.0:
155; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
156; AVX512F-64-NEXT:    vmovq %rax, %xmm1
157; AVX512F-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
158; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
159; AVX512F-64-NEXT:    vmovq %rax, %xmm0
160; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
161; AVX512F-64-NEXT:    retq
162;
163; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
164; AVX512VL-32:       # %bb.0:
165; AVX512VL-32-NEXT:    pushl %ebp
166; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
167; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
168; AVX512VL-32-NEXT:    movl %esp, %ebp
169; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
170; AVX512VL-32-NEXT:    andl $-8, %esp
171; AVX512VL-32-NEXT:    subl $16, %esp
172; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
173; AVX512VL-32-NEXT:    vmovhps %xmm0, (%esp)
174; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
175; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
176; AVX512VL-32-NEXT:    fldl (%esp)
177; AVX512VL-32-NEXT:    fisttpll (%esp)
178; AVX512VL-32-NEXT:    wait
179; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
180; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
181; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
182; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
183; AVX512VL-32-NEXT:    movl %ebp, %esp
184; AVX512VL-32-NEXT:    popl %ebp
185; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
186; AVX512VL-32-NEXT:    retl
187;
188; AVX512VL-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
189; AVX512VL-64:       # %bb.0:
190; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
191; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
192; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
193; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
194; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
195; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
196; AVX512VL-64-NEXT:    retq
197;
198; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
199; AVX512DQ:       # %bb.0:
200; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
201; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
202; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
203; AVX512DQ-NEXT:    vzeroupper
204; AVX512DQ-NEXT:    ret{{[l|q]}}
205;
206; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
207; AVX512VLDQ:       # %bb.0:
208; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
209; AVX512VLDQ-NEXT:    ret{{[l|q]}}
210  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a,
211                                              metadata !"fpexcept.strict") #0
212  ret <2 x i64> %ret
213}
214
215define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
216; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
217; SSE-32:       # %bb.0:
218; SSE-32-NEXT:    pushl %ebp
219; SSE-32-NEXT:    .cfi_def_cfa_offset 8
220; SSE-32-NEXT:    .cfi_offset %ebp, -8
221; SSE-32-NEXT:    movl %esp, %ebp
222; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
223; SSE-32-NEXT:    andl $-8, %esp
224; SSE-32-NEXT:    subl $24, %esp
225; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
226; SSE-32-NEXT:    comisd %xmm1, %xmm0
227; SSE-32-NEXT:    movapd %xmm1, %xmm2
228; SSE-32-NEXT:    jae .LBB1_2
229; SSE-32-NEXT:  # %bb.1:
230; SSE-32-NEXT:    xorpd %xmm2, %xmm2
231; SSE-32-NEXT:  .LBB1_2:
232; SSE-32-NEXT:    movapd %xmm0, %xmm3
233; SSE-32-NEXT:    subsd %xmm2, %xmm3
234; SSE-32-NEXT:    movsd %xmm3, {{[0-9]+}}(%esp)
235; SSE-32-NEXT:    setae %al
236; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
237; SSE-32-NEXT:    wait
238; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
239; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
240; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
241; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
242; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
243; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
244; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
245; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
246; SSE-32-NEXT:    comisd %xmm1, %xmm0
247; SSE-32-NEXT:    jae .LBB1_4
248; SSE-32-NEXT:  # %bb.3:
249; SSE-32-NEXT:    xorpd %xmm1, %xmm1
250; SSE-32-NEXT:  .LBB1_4:
251; SSE-32-NEXT:    subsd %xmm1, %xmm0
252; SSE-32-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
253; SSE-32-NEXT:    setae %cl
254; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
255; SSE-32-NEXT:    wait
256; SSE-32-NEXT:    fnstcw (%esp)
257; SSE-32-NEXT:    movzwl (%esp), %edx
258; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
259; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
260; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
261; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
262; SSE-32-NEXT:    fldcw (%esp)
263; SSE-32-NEXT:    movzbl %al, %eax
264; SSE-32-NEXT:    shll $31, %eax
265; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
266; SSE-32-NEXT:    movd %eax, %xmm1
267; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
268; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
269; SSE-32-NEXT:    movzbl %cl, %eax
270; SSE-32-NEXT:    shll $31, %eax
271; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
272; SSE-32-NEXT:    movd %eax, %xmm1
273; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
274; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
275; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
276; SSE-32-NEXT:    movl %ebp, %esp
277; SSE-32-NEXT:    popl %ebp
278; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
279; SSE-32-NEXT:    retl
280;
281; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
282; SSE-64:       # %bb.0:
283; SSE-64-NEXT:    movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0]
284; SSE-64-NEXT:    comisd %xmm3, %xmm0
285; SSE-64-NEXT:    xorpd %xmm2, %xmm2
286; SSE-64-NEXT:    xorpd %xmm1, %xmm1
287; SSE-64-NEXT:    jb .LBB1_2
288; SSE-64-NEXT:  # %bb.1:
289; SSE-64-NEXT:    movapd %xmm3, %xmm1
290; SSE-64-NEXT:  .LBB1_2:
291; SSE-64-NEXT:    movapd %xmm0, %xmm4
292; SSE-64-NEXT:    subsd %xmm1, %xmm4
293; SSE-64-NEXT:    cvttsd2si %xmm4, %rax
294; SSE-64-NEXT:    setae %cl
295; SSE-64-NEXT:    movzbl %cl, %ecx
296; SSE-64-NEXT:    shlq $63, %rcx
297; SSE-64-NEXT:    xorq %rax, %rcx
298; SSE-64-NEXT:    movq %rcx, %xmm1
299; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
300; SSE-64-NEXT:    comisd %xmm3, %xmm0
301; SSE-64-NEXT:    jb .LBB1_4
302; SSE-64-NEXT:  # %bb.3:
303; SSE-64-NEXT:    movapd %xmm3, %xmm2
304; SSE-64-NEXT:  .LBB1_4:
305; SSE-64-NEXT:    subsd %xmm2, %xmm0
306; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
307; SSE-64-NEXT:    setae %cl
308; SSE-64-NEXT:    movzbl %cl, %ecx
309; SSE-64-NEXT:    shlq $63, %rcx
310; SSE-64-NEXT:    xorq %rax, %rcx
311; SSE-64-NEXT:    movq %rcx, %xmm0
312; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
313; SSE-64-NEXT:    movdqa %xmm1, %xmm0
314; SSE-64-NEXT:    retq
315;
316; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
317; AVX-32:       # %bb.0:
318; AVX-32-NEXT:    pushl %ebp
319; AVX-32-NEXT:    .cfi_def_cfa_offset 8
320; AVX-32-NEXT:    .cfi_offset %ebp, -8
321; AVX-32-NEXT:    movl %esp, %ebp
322; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
323; AVX-32-NEXT:    andl $-8, %esp
324; AVX-32-NEXT:    subl $16, %esp
325; AVX-32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
326; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
327; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
328; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
329; AVX-32-NEXT:    jae .LBB1_2
330; AVX-32-NEXT:  # %bb.1:
331; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
332; AVX-32-NEXT:  .LBB1_2:
333; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
334; AVX-32-NEXT:    vmovsd %xmm2, (%esp)
335; AVX-32-NEXT:    fldl (%esp)
336; AVX-32-NEXT:    fisttpll (%esp)
337; AVX-32-NEXT:    wait
338; AVX-32-NEXT:    setae %al
339; AVX-32-NEXT:    movzbl %al, %eax
340; AVX-32-NEXT:    shll $31, %eax
341; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
342; AVX-32-NEXT:    vcomisd %xmm1, %xmm0
343; AVX-32-NEXT:    jae .LBB1_4
344; AVX-32-NEXT:  # %bb.3:
345; AVX-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
346; AVX-32-NEXT:  .LBB1_4:
347; AVX-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
348; AVX-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
349; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
350; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
351; AVX-32-NEXT:    wait
352; AVX-32-NEXT:    setae %cl
353; AVX-32-NEXT:    movzbl %cl, %ecx
354; AVX-32-NEXT:    shll $31, %ecx
355; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
356; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
357; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
358; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
359; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
360; AVX-32-NEXT:    movl %ebp, %esp
361; AVX-32-NEXT:    popl %ebp
362; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
363; AVX-32-NEXT:    retl
364;
365; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
366; AVX-64:       # %bb.0:
367; AVX-64-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
368; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
369; AVX-64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
370; AVX-64-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
371; AVX-64-NEXT:    jb .LBB1_2
372; AVX-64-NEXT:  # %bb.1:
373; AVX-64-NEXT:    vmovapd %xmm1, %xmm3
374; AVX-64-NEXT:  .LBB1_2:
375; AVX-64-NEXT:    vsubsd %xmm3, %xmm0, %xmm3
376; AVX-64-NEXT:    vcvttsd2si %xmm3, %rax
377; AVX-64-NEXT:    setae %cl
378; AVX-64-NEXT:    movzbl %cl, %ecx
379; AVX-64-NEXT:    shlq $63, %rcx
380; AVX-64-NEXT:    xorq %rax, %rcx
381; AVX-64-NEXT:    vmovq %rcx, %xmm3
382; AVX-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
383; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
384; AVX-64-NEXT:    jb .LBB1_4
385; AVX-64-NEXT:  # %bb.3:
386; AVX-64-NEXT:    vmovapd %xmm1, %xmm2
387; AVX-64-NEXT:  .LBB1_4:
388; AVX-64-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
389; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
390; AVX-64-NEXT:    setae %cl
391; AVX-64-NEXT:    movzbl %cl, %ecx
392; AVX-64-NEXT:    shlq $63, %rcx
393; AVX-64-NEXT:    xorq %rax, %rcx
394; AVX-64-NEXT:    vmovq %rcx, %xmm0
395; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
396; AVX-64-NEXT:    retq
397;
398; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
399; AVX512F-32:       # %bb.0:
400; AVX512F-32-NEXT:    pushl %ebp
401; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
402; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
403; AVX512F-32-NEXT:    movl %esp, %ebp
404; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
405; AVX512F-32-NEXT:    andl $-8, %esp
406; AVX512F-32-NEXT:    subl $16, %esp
407; AVX512F-32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
408; AVX512F-32-NEXT:    vmovsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
409; AVX512F-32-NEXT:    xorl %eax, %eax
410; AVX512F-32-NEXT:    vcomisd %xmm2, %xmm1
411; AVX512F-32-NEXT:    setae %al
412; AVX512F-32-NEXT:    kmovw %eax, %k1
413; AVX512F-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
414; AVX512F-32-NEXT:    vsubsd %xmm3, %xmm1, %xmm1
415; AVX512F-32-NEXT:    vmovsd %xmm1, (%esp)
416; AVX512F-32-NEXT:    xorl %ecx, %ecx
417; AVX512F-32-NEXT:    vcomisd %xmm2, %xmm0
418; AVX512F-32-NEXT:    setae %cl
419; AVX512F-32-NEXT:    kmovw %ecx, %k1
420; AVX512F-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
421; AVX512F-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
422; AVX512F-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
423; AVX512F-32-NEXT:    fldl (%esp)
424; AVX512F-32-NEXT:    fisttpll (%esp)
425; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
426; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
427; AVX512F-32-NEXT:    wait
428; AVX512F-32-NEXT:    shll $31, %eax
429; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
430; AVX512F-32-NEXT:    shll $31, %ecx
431; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
432; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
433; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
434; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
435; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
436; AVX512F-32-NEXT:    movl %ebp, %esp
437; AVX512F-32-NEXT:    popl %ebp
438; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
439; AVX512F-32-NEXT:    retl
440;
441; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
442; AVX512F-64:       # %bb.0:
443; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
444; AVX512F-64-NEXT:    vmovq %rax, %xmm1
445; AVX512F-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
446; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
447; AVX512F-64-NEXT:    vmovq %rax, %xmm0
448; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
449; AVX512F-64-NEXT:    retq
450;
451; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
452; AVX512VL-32:       # %bb.0:
453; AVX512VL-32-NEXT:    pushl %ebp
454; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
455; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
456; AVX512VL-32-NEXT:    movl %esp, %ebp
457; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
458; AVX512VL-32-NEXT:    andl $-8, %esp
459; AVX512VL-32-NEXT:    subl $16, %esp
460; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
461; AVX512VL-32-NEXT:    vmovsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
462; AVX512VL-32-NEXT:    xorl %eax, %eax
463; AVX512VL-32-NEXT:    vcomisd %xmm2, %xmm1
464; AVX512VL-32-NEXT:    setae %al
465; AVX512VL-32-NEXT:    kmovw %eax, %k1
466; AVX512VL-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
467; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm1, %xmm1
468; AVX512VL-32-NEXT:    vmovsd %xmm1, (%esp)
469; AVX512VL-32-NEXT:    xorl %ecx, %ecx
470; AVX512VL-32-NEXT:    vcomisd %xmm2, %xmm0
471; AVX512VL-32-NEXT:    setae %cl
472; AVX512VL-32-NEXT:    kmovw %ecx, %k1
473; AVX512VL-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
474; AVX512VL-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
475; AVX512VL-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
476; AVX512VL-32-NEXT:    fldl (%esp)
477; AVX512VL-32-NEXT:    fisttpll (%esp)
478; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
479; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
480; AVX512VL-32-NEXT:    wait
481; AVX512VL-32-NEXT:    shll $31, %eax
482; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
483; AVX512VL-32-NEXT:    shll $31, %ecx
484; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
485; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
486; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
487; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
488; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
489; AVX512VL-32-NEXT:    movl %ebp, %esp
490; AVX512VL-32-NEXT:    popl %ebp
491; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
492; AVX512VL-32-NEXT:    retl
493;
494; AVX512VL-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
495; AVX512VL-64:       # %bb.0:
496; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
497; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
498; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
499; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
500; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
501; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
502; AVX512VL-64-NEXT:    retq
503;
504; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
505; AVX512DQ:       # %bb.0:
506; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
507; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
508; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
509; AVX512DQ-NEXT:    vzeroupper
510; AVX512DQ-NEXT:    ret{{[l|q]}}
511;
512; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
513; AVX512VLDQ:       # %bb.0:
514; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
515; AVX512VLDQ-NEXT:    ret{{[l|q]}}
516  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a,
517                                              metadata !"fpexcept.strict") #0
518  ret <2 x i64> %ret
519}
520
521define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
522; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
523; SSE-32:       # %bb.0:
524; SSE-32-NEXT:    pushl %ebp
525; SSE-32-NEXT:    .cfi_def_cfa_offset 8
526; SSE-32-NEXT:    .cfi_offset %ebp, -8
527; SSE-32-NEXT:    movl %esp, %ebp
528; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
529; SSE-32-NEXT:    andl $-8, %esp
530; SSE-32-NEXT:    subl $24, %esp
531; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
532; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
533; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
534; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
535; SSE-32-NEXT:    wait
536; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
537; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
538; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
539; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
540; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
541; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
542; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
543; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
544; SSE-32-NEXT:    wait
545; SSE-32-NEXT:    fnstcw (%esp)
546; SSE-32-NEXT:    movzwl (%esp), %eax
547; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
548; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
549; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
550; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
551; SSE-32-NEXT:    fldcw (%esp)
552; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
553; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
554; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
555; SSE-32-NEXT:    movl %ebp, %esp
556; SSE-32-NEXT:    popl %ebp
557; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
558; SSE-32-NEXT:    retl
559;
560; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
561; SSE-64:       # %bb.0:
562; SSE-64-NEXT:    cvttss2si %xmm0, %rax
563; SSE-64-NEXT:    movq %rax, %xmm1
564; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
565; SSE-64-NEXT:    cvttss2si %xmm0, %rax
566; SSE-64-NEXT:    movq %rax, %xmm0
567; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
568; SSE-64-NEXT:    movdqa %xmm1, %xmm0
569; SSE-64-NEXT:    retq
570;
571; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
572; AVX-32:       # %bb.0:
573; AVX-32-NEXT:    pushl %ebp
574; AVX-32-NEXT:    .cfi_def_cfa_offset 8
575; AVX-32-NEXT:    .cfi_offset %ebp, -8
576; AVX-32-NEXT:    movl %esp, %ebp
577; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
578; AVX-32-NEXT:    andl $-8, %esp
579; AVX-32-NEXT:    subl $16, %esp
580; AVX-32-NEXT:    vmovss %xmm0, (%esp)
581; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
582; AVX-32-NEXT:    flds (%esp)
583; AVX-32-NEXT:    fisttpll (%esp)
584; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
585; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
586; AVX-32-NEXT:    wait
587; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
588; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
589; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
590; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
591; AVX-32-NEXT:    movl %ebp, %esp
592; AVX-32-NEXT:    popl %ebp
593; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
594; AVX-32-NEXT:    retl
595;
596; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
597; AVX-64:       # %bb.0:
598; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
599; AVX-64-NEXT:    vmovq %rax, %xmm1
600; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
601; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
602; AVX-64-NEXT:    vmovq %rax, %xmm0
603; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
604; AVX-64-NEXT:    retq
605;
606; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
607; AVX512F-32:       # %bb.0:
608; AVX512F-32-NEXT:    pushl %ebp
609; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
610; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
611; AVX512F-32-NEXT:    movl %esp, %ebp
612; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
613; AVX512F-32-NEXT:    andl $-8, %esp
614; AVX512F-32-NEXT:    subl $16, %esp
615; AVX512F-32-NEXT:    vmovd %xmm0, (%esp)
616; AVX512F-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
617; AVX512F-32-NEXT:    flds (%esp)
618; AVX512F-32-NEXT:    fisttpll (%esp)
619; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
620; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
621; AVX512F-32-NEXT:    wait
622; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
623; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
624; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
625; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
626; AVX512F-32-NEXT:    movl %ebp, %esp
627; AVX512F-32-NEXT:    popl %ebp
628; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
629; AVX512F-32-NEXT:    retl
630;
631; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
632; AVX512F-64:       # %bb.0:
633; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
634; AVX512F-64-NEXT:    vmovq %rax, %xmm1
635; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
636; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
637; AVX512F-64-NEXT:    vmovq %rax, %xmm0
638; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
639; AVX512F-64-NEXT:    retq
640;
641; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
642; AVX512VL-32:       # %bb.0:
643; AVX512VL-32-NEXT:    pushl %ebp
644; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
645; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
646; AVX512VL-32-NEXT:    movl %esp, %ebp
647; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
648; AVX512VL-32-NEXT:    andl $-8, %esp
649; AVX512VL-32-NEXT:    subl $16, %esp
650; AVX512VL-32-NEXT:    vmovd %xmm0, (%esp)
651; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
652; AVX512VL-32-NEXT:    flds (%esp)
653; AVX512VL-32-NEXT:    fisttpll (%esp)
654; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
655; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
656; AVX512VL-32-NEXT:    wait
657; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
658; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
659; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
660; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
661; AVX512VL-32-NEXT:    movl %ebp, %esp
662; AVX512VL-32-NEXT:    popl %ebp
663; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
664; AVX512VL-32-NEXT:    retl
665;
666; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
667; AVX512VL-64:       # %bb.0:
668; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
669; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
670; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
671; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
672; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
673; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
674; AVX512VL-64-NEXT:    retq
675;
676; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
677; AVX512DQ:       # %bb.0:
678; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
679; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
680; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
681; AVX512DQ-NEXT:    vzeroupper
682; AVX512DQ-NEXT:    ret{{[l|q]}}
683;
684; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
685; AVX512VLDQ:       # %bb.0:
686; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
687; AVX512VLDQ-NEXT:    ret{{[l|q]}}
688  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a,
689                                              metadata !"fpexcept.strict") #0
690  ret <2 x i64> %ret
691}
692
693define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(ptr %x) strictfp {
694; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
695; SSE-32:       # %bb.0:
696; SSE-32-NEXT:    pushl %ebp
697; SSE-32-NEXT:    .cfi_def_cfa_offset 8
698; SSE-32-NEXT:    .cfi_offset %ebp, -8
699; SSE-32-NEXT:    movl %esp, %ebp
700; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
701; SSE-32-NEXT:    andl $-8, %esp
702; SSE-32-NEXT:    subl $24, %esp
703; SSE-32-NEXT:    movl 8(%ebp), %eax
704; SSE-32-NEXT:    movaps (%eax), %xmm0
705; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
706; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
707; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
708; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
709; SSE-32-NEXT:    wait
710; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
711; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
712; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
713; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
714; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
715; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
716; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
717; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
718; SSE-32-NEXT:    wait
719; SSE-32-NEXT:    fnstcw (%esp)
720; SSE-32-NEXT:    movzwl (%esp), %eax
721; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
722; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
723; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
724; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
725; SSE-32-NEXT:    fldcw (%esp)
726; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
727; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
728; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
729; SSE-32-NEXT:    movl %ebp, %esp
730; SSE-32-NEXT:    popl %ebp
731; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
732; SSE-32-NEXT:    retl
733;
734; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
735; SSE-64:       # %bb.0:
736; SSE-64-NEXT:    movaps (%rdi), %xmm1
737; SSE-64-NEXT:    cvttss2si %xmm1, %rax
738; SSE-64-NEXT:    movq %rax, %xmm0
739; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
740; SSE-64-NEXT:    cvttss2si %xmm1, %rax
741; SSE-64-NEXT:    movq %rax, %xmm1
742; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
743; SSE-64-NEXT:    retq
744;
745; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
746; AVX-32:       # %bb.0:
747; AVX-32-NEXT:    pushl %ebp
748; AVX-32-NEXT:    .cfi_def_cfa_offset 8
749; AVX-32-NEXT:    .cfi_offset %ebp, -8
750; AVX-32-NEXT:    movl %esp, %ebp
751; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
752; AVX-32-NEXT:    andl $-8, %esp
753; AVX-32-NEXT:    subl $16, %esp
754; AVX-32-NEXT:    movl 8(%ebp), %eax
755; AVX-32-NEXT:    vmovaps (%eax), %xmm0
756; AVX-32-NEXT:    vmovss %xmm0, (%esp)
757; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
758; AVX-32-NEXT:    flds (%esp)
759; AVX-32-NEXT:    fisttpll (%esp)
760; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
761; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
762; AVX-32-NEXT:    wait
763; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
764; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
765; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
766; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
767; AVX-32-NEXT:    movl %ebp, %esp
768; AVX-32-NEXT:    popl %ebp
769; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
770; AVX-32-NEXT:    retl
771;
772; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
773; AVX-64:       # %bb.0:
774; AVX-64-NEXT:    vcvttss2si 4(%rdi), %rax
775; AVX-64-NEXT:    vmovq %rax, %xmm0
776; AVX-64-NEXT:    vcvttss2si (%rdi), %rax
777; AVX-64-NEXT:    vmovq %rax, %xmm1
778; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
779; AVX-64-NEXT:    retq
780;
781; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
782; AVX512F-32:       # %bb.0:
783; AVX512F-32-NEXT:    pushl %ebp
784; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
785; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
786; AVX512F-32-NEXT:    movl %esp, %ebp
787; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
788; AVX512F-32-NEXT:    andl $-8, %esp
789; AVX512F-32-NEXT:    subl $16, %esp
790; AVX512F-32-NEXT:    movl 8(%ebp), %eax
791; AVX512F-32-NEXT:    vmovdqa (%eax), %xmm0
792; AVX512F-32-NEXT:    vmovd %xmm0, (%esp)
793; AVX512F-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
794; AVX512F-32-NEXT:    flds (%esp)
795; AVX512F-32-NEXT:    fisttpll (%esp)
796; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
797; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
798; AVX512F-32-NEXT:    wait
799; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
800; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
801; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
802; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
803; AVX512F-32-NEXT:    movl %ebp, %esp
804; AVX512F-32-NEXT:    popl %ebp
805; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
806; AVX512F-32-NEXT:    retl
807;
808; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
809; AVX512F-64:       # %bb.0:
810; AVX512F-64-NEXT:    vcvttss2si 4(%rdi), %rax
811; AVX512F-64-NEXT:    vmovq %rax, %xmm0
812; AVX512F-64-NEXT:    vcvttss2si (%rdi), %rax
813; AVX512F-64-NEXT:    vmovq %rax, %xmm1
814; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
815; AVX512F-64-NEXT:    retq
816;
817; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
818; AVX512VL-32:       # %bb.0:
819; AVX512VL-32-NEXT:    pushl %ebp
820; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
821; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
822; AVX512VL-32-NEXT:    movl %esp, %ebp
823; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
824; AVX512VL-32-NEXT:    andl $-8, %esp
825; AVX512VL-32-NEXT:    subl $16, %esp
826; AVX512VL-32-NEXT:    movl 8(%ebp), %eax
827; AVX512VL-32-NEXT:    vmovdqa (%eax), %xmm0
828; AVX512VL-32-NEXT:    vmovd %xmm0, (%esp)
829; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
830; AVX512VL-32-NEXT:    flds (%esp)
831; AVX512VL-32-NEXT:    fisttpll (%esp)
832; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
833; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
834; AVX512VL-32-NEXT:    wait
835; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
836; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
837; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
838; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
839; AVX512VL-32-NEXT:    movl %ebp, %esp
840; AVX512VL-32-NEXT:    popl %ebp
841; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
842; AVX512VL-32-NEXT:    retl
843;
844; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
845; AVX512VL-64:       # %bb.0:
846; AVX512VL-64-NEXT:    vcvttss2si 4(%rdi), %rax
847; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
848; AVX512VL-64-NEXT:    vcvttss2si (%rdi), %rax
849; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
850; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
851; AVX512VL-64-NEXT:    retq
852;
853; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
854; AVX512DQ-32:       # %bb.0:
855; AVX512DQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
856; AVX512DQ-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
857; AVX512DQ-32-NEXT:    vcvttps2qq %ymm0, %zmm0
858; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
859; AVX512DQ-32-NEXT:    vzeroupper
860; AVX512DQ-32-NEXT:    retl
861;
862; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
863; AVX512DQ-64:       # %bb.0:
864; AVX512DQ-64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
865; AVX512DQ-64-NEXT:    vcvttps2qq %ymm0, %zmm0
866; AVX512DQ-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
867; AVX512DQ-64-NEXT:    vzeroupper
868; AVX512DQ-64-NEXT:    retq
869;
870; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
871; AVX512VLDQ-32:       # %bb.0:
872; AVX512VLDQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
873; AVX512VLDQ-32-NEXT:    vcvttps2qq (%eax), %xmm0
874; AVX512VLDQ-32-NEXT:    retl
875;
876; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
877; AVX512VLDQ-64:       # %bb.0:
878; AVX512VLDQ-64-NEXT:    vcvttps2qq (%rdi), %xmm0
879; AVX512VLDQ-64-NEXT:    retq
880  %a = load <4 x float>, ptr %x
881  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
882  %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
883  ret <2 x i64> %c
884}
885
886define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
887; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
888; SSE-32:       # %bb.0:
889; SSE-32-NEXT:    pushl %ebp
890; SSE-32-NEXT:    .cfi_def_cfa_offset 8
891; SSE-32-NEXT:    .cfi_offset %ebp, -8
892; SSE-32-NEXT:    movl %esp, %ebp
893; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
894; SSE-32-NEXT:    andl $-8, %esp
895; SSE-32-NEXT:    subl $24, %esp
896; SSE-32-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
897; SSE-32-NEXT:    comiss %xmm1, %xmm0
898; SSE-32-NEXT:    movaps %xmm1, %xmm2
899; SSE-32-NEXT:    jae .LBB4_2
900; SSE-32-NEXT:  # %bb.1:
901; SSE-32-NEXT:    xorps %xmm2, %xmm2
902; SSE-32-NEXT:  .LBB4_2:
903; SSE-32-NEXT:    movaps %xmm0, %xmm3
904; SSE-32-NEXT:    subss %xmm2, %xmm3
905; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
906; SSE-32-NEXT:    setae %al
907; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
908; SSE-32-NEXT:    wait
909; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
910; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
911; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
912; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
913; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
914; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
915; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
916; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
917; SSE-32-NEXT:    comiss %xmm1, %xmm0
918; SSE-32-NEXT:    jae .LBB4_4
919; SSE-32-NEXT:  # %bb.3:
920; SSE-32-NEXT:    xorps %xmm1, %xmm1
921; SSE-32-NEXT:  .LBB4_4:
922; SSE-32-NEXT:    subss %xmm1, %xmm0
923; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
924; SSE-32-NEXT:    setae %cl
925; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
926; SSE-32-NEXT:    wait
927; SSE-32-NEXT:    fnstcw (%esp)
928; SSE-32-NEXT:    movzwl (%esp), %edx
929; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
930; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
931; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
932; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
933; SSE-32-NEXT:    fldcw (%esp)
934; SSE-32-NEXT:    movzbl %al, %eax
935; SSE-32-NEXT:    shll $31, %eax
936; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
937; SSE-32-NEXT:    movd %eax, %xmm1
938; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
939; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
940; SSE-32-NEXT:    movzbl %cl, %eax
941; SSE-32-NEXT:    shll $31, %eax
942; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
943; SSE-32-NEXT:    movd %eax, %xmm1
944; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
945; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
946; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
947; SSE-32-NEXT:    movl %ebp, %esp
948; SSE-32-NEXT:    popl %ebp
949; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
950; SSE-32-NEXT:    retl
951;
952; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
953; SSE-64:       # %bb.0:
954; SSE-64-NEXT:    movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
955; SSE-64-NEXT:    comiss %xmm3, %xmm0
956; SSE-64-NEXT:    xorps %xmm2, %xmm2
957; SSE-64-NEXT:    xorps %xmm1, %xmm1
958; SSE-64-NEXT:    jb .LBB4_2
959; SSE-64-NEXT:  # %bb.1:
960; SSE-64-NEXT:    movaps %xmm3, %xmm1
961; SSE-64-NEXT:  .LBB4_2:
962; SSE-64-NEXT:    movaps %xmm0, %xmm4
963; SSE-64-NEXT:    subss %xmm1, %xmm4
964; SSE-64-NEXT:    cvttss2si %xmm4, %rax
965; SSE-64-NEXT:    setae %cl
966; SSE-64-NEXT:    movzbl %cl, %ecx
967; SSE-64-NEXT:    shlq $63, %rcx
968; SSE-64-NEXT:    xorq %rax, %rcx
969; SSE-64-NEXT:    movq %rcx, %xmm1
970; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
971; SSE-64-NEXT:    comiss %xmm3, %xmm0
972; SSE-64-NEXT:    jb .LBB4_4
973; SSE-64-NEXT:  # %bb.3:
974; SSE-64-NEXT:    movaps %xmm3, %xmm2
975; SSE-64-NEXT:  .LBB4_4:
976; SSE-64-NEXT:    subss %xmm2, %xmm0
977; SSE-64-NEXT:    cvttss2si %xmm0, %rax
978; SSE-64-NEXT:    setae %cl
979; SSE-64-NEXT:    movzbl %cl, %ecx
980; SSE-64-NEXT:    shlq $63, %rcx
981; SSE-64-NEXT:    xorq %rax, %rcx
982; SSE-64-NEXT:    movq %rcx, %xmm0
983; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
984; SSE-64-NEXT:    movdqa %xmm1, %xmm0
985; SSE-64-NEXT:    retq
986;
987; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
988; AVX-32:       # %bb.0:
989; AVX-32-NEXT:    pushl %ebp
990; AVX-32-NEXT:    .cfi_def_cfa_offset 8
991; AVX-32-NEXT:    .cfi_offset %ebp, -8
992; AVX-32-NEXT:    movl %esp, %ebp
993; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
994; AVX-32-NEXT:    andl $-8, %esp
995; AVX-32-NEXT:    subl $16, %esp
996; AVX-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
997; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
998; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
999; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
1000; AVX-32-NEXT:    jae .LBB4_2
1001; AVX-32-NEXT:  # %bb.1:
1002; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1003; AVX-32-NEXT:  .LBB4_2:
1004; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
1005; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
1006; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
1007; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1008; AVX-32-NEXT:    wait
1009; AVX-32-NEXT:    setae %al
1010; AVX-32-NEXT:    movzbl %al, %eax
1011; AVX-32-NEXT:    shll $31, %eax
1012; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1013; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
1014; AVX-32-NEXT:    jae .LBB4_4
1015; AVX-32-NEXT:  # %bb.3:
1016; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1017; AVX-32-NEXT:  .LBB4_4:
1018; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1019; AVX-32-NEXT:    vmovss %xmm0, (%esp)
1020; AVX-32-NEXT:    flds (%esp)
1021; AVX-32-NEXT:    fisttpll (%esp)
1022; AVX-32-NEXT:    wait
1023; AVX-32-NEXT:    setae %cl
1024; AVX-32-NEXT:    movzbl %cl, %ecx
1025; AVX-32-NEXT:    shll $31, %ecx
1026; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1027; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1028; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1029; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1030; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1031; AVX-32-NEXT:    movl %ebp, %esp
1032; AVX-32-NEXT:    popl %ebp
1033; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1034; AVX-32-NEXT:    retl
1035;
1036; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1037; AVX-64:       # %bb.0:
1038; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1039; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
1040; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1041; AVX-64-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1042; AVX-64-NEXT:    jb .LBB4_2
1043; AVX-64-NEXT:  # %bb.1:
1044; AVX-64-NEXT:    vmovaps %xmm1, %xmm3
1045; AVX-64-NEXT:  .LBB4_2:
1046; AVX-64-NEXT:    vsubss %xmm3, %xmm0, %xmm3
1047; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
1048; AVX-64-NEXT:    setae %cl
1049; AVX-64-NEXT:    movzbl %cl, %ecx
1050; AVX-64-NEXT:    shlq $63, %rcx
1051; AVX-64-NEXT:    xorq %rax, %rcx
1052; AVX-64-NEXT:    vmovq %rcx, %xmm3
1053; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1054; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
1055; AVX-64-NEXT:    jb .LBB4_4
1056; AVX-64-NEXT:  # %bb.3:
1057; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
1058; AVX-64-NEXT:  .LBB4_4:
1059; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
1060; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
1061; AVX-64-NEXT:    setae %cl
1062; AVX-64-NEXT:    movzbl %cl, %ecx
1063; AVX-64-NEXT:    shlq $63, %rcx
1064; AVX-64-NEXT:    xorq %rax, %rcx
1065; AVX-64-NEXT:    vmovq %rcx, %xmm0
1066; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1067; AVX-64-NEXT:    retq
1068;
1069; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1070; AVX512F-32:       # %bb.0:
1071; AVX512F-32-NEXT:    pushl %ebp
1072; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
1073; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
1074; AVX512F-32-NEXT:    movl %esp, %ebp
1075; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
1076; AVX512F-32-NEXT:    andl $-8, %esp
1077; AVX512F-32-NEXT:    subl $16, %esp
1078; AVX512F-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1079; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1080; AVX512F-32-NEXT:    xorl %eax, %eax
1081; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
1082; AVX512F-32-NEXT:    setae %al
1083; AVX512F-32-NEXT:    kmovw %eax, %k1
1084; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1085; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1086; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1087; AVX512F-32-NEXT:    xorl %ecx, %ecx
1088; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm0
1089; AVX512F-32-NEXT:    setae %cl
1090; AVX512F-32-NEXT:    kmovw %ecx, %k1
1091; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1092; AVX512F-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1093; AVX512F-32-NEXT:    vmovss %xmm0, (%esp)
1094; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
1095; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1096; AVX512F-32-NEXT:    flds (%esp)
1097; AVX512F-32-NEXT:    fisttpll (%esp)
1098; AVX512F-32-NEXT:    wait
1099; AVX512F-32-NEXT:    shll $31, %eax
1100; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1101; AVX512F-32-NEXT:    shll $31, %ecx
1102; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1103; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1104; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1105; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1106; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1107; AVX512F-32-NEXT:    movl %ebp, %esp
1108; AVX512F-32-NEXT:    popl %ebp
1109; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
1110; AVX512F-32-NEXT:    retl
1111;
1112; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1113; AVX512F-64:       # %bb.0:
1114; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
1115; AVX512F-64-NEXT:    vmovq %rax, %xmm1
1116; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1117; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
1118; AVX512F-64-NEXT:    vmovq %rax, %xmm0
1119; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1120; AVX512F-64-NEXT:    retq
1121;
1122; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1123; AVX512VL-32:       # %bb.0:
1124; AVX512VL-32-NEXT:    pushl %ebp
1125; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
1126; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
1127; AVX512VL-32-NEXT:    movl %esp, %ebp
1128; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
1129; AVX512VL-32-NEXT:    andl $-8, %esp
1130; AVX512VL-32-NEXT:    subl $16, %esp
1131; AVX512VL-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1132; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1133; AVX512VL-32-NEXT:    xorl %eax, %eax
1134; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
1135; AVX512VL-32-NEXT:    setae %al
1136; AVX512VL-32-NEXT:    kmovw %eax, %k1
1137; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1138; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1139; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1140; AVX512VL-32-NEXT:    xorl %ecx, %ecx
1141; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm0
1142; AVX512VL-32-NEXT:    setae %cl
1143; AVX512VL-32-NEXT:    kmovw %ecx, %k1
1144; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1145; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1146; AVX512VL-32-NEXT:    vmovss %xmm0, (%esp)
1147; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
1148; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1149; AVX512VL-32-NEXT:    flds (%esp)
1150; AVX512VL-32-NEXT:    fisttpll (%esp)
1151; AVX512VL-32-NEXT:    wait
1152; AVX512VL-32-NEXT:    shll $31, %eax
1153; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1154; AVX512VL-32-NEXT:    shll $31, %ecx
1155; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1156; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1157; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1158; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1159; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1160; AVX512VL-32-NEXT:    movl %ebp, %esp
1161; AVX512VL-32-NEXT:    popl %ebp
1162; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
1163; AVX512VL-32-NEXT:    retl
1164;
1165; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1166; AVX512VL-64:       # %bb.0:
1167; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
1168; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
1169; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1170; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
1171; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
1172; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1173; AVX512VL-64-NEXT:    retq
1174;
1175; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1176; AVX512DQ:       # %bb.0:
1177; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1178; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1179; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1180; AVX512DQ-NEXT:    vzeroupper
1181; AVX512DQ-NEXT:    ret{{[l|q]}}
1182;
1183; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1184; AVX512VLDQ:       # %bb.0:
1185; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
1186; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1187  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a,
1188                                              metadata !"fpexcept.strict") #0
1189  ret <2 x i64> %ret
1190}
1191
1192define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(ptr %x) strictfp {
1193; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1194; SSE-32:       # %bb.0:
1195; SSE-32-NEXT:    pushl %ebp
1196; SSE-32-NEXT:    .cfi_def_cfa_offset 8
1197; SSE-32-NEXT:    .cfi_offset %ebp, -8
1198; SSE-32-NEXT:    movl %esp, %ebp
1199; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
1200; SSE-32-NEXT:    andl $-8, %esp
1201; SSE-32-NEXT:    subl $24, %esp
1202; SSE-32-NEXT:    movl 8(%ebp), %eax
1203; SSE-32-NEXT:    movaps (%eax), %xmm0
1204; SSE-32-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1205; SSE-32-NEXT:    comiss %xmm1, %xmm0
1206; SSE-32-NEXT:    movaps %xmm1, %xmm2
1207; SSE-32-NEXT:    jae .LBB5_2
1208; SSE-32-NEXT:  # %bb.1:
1209; SSE-32-NEXT:    xorps %xmm2, %xmm2
1210; SSE-32-NEXT:  .LBB5_2:
1211; SSE-32-NEXT:    movaps %xmm0, %xmm3
1212; SSE-32-NEXT:    subss %xmm2, %xmm3
1213; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
1214; SSE-32-NEXT:    setae %al
1215; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
1216; SSE-32-NEXT:    wait
1217; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
1218; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
1219; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
1220; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
1221; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
1222; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
1223; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
1224; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1225; SSE-32-NEXT:    comiss %xmm1, %xmm0
1226; SSE-32-NEXT:    jae .LBB5_4
1227; SSE-32-NEXT:  # %bb.3:
1228; SSE-32-NEXT:    xorps %xmm1, %xmm1
1229; SSE-32-NEXT:  .LBB5_4:
1230; SSE-32-NEXT:    subss %xmm1, %xmm0
1231; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1232; SSE-32-NEXT:    setae %cl
1233; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
1234; SSE-32-NEXT:    wait
1235; SSE-32-NEXT:    fnstcw (%esp)
1236; SSE-32-NEXT:    movzwl (%esp), %edx
1237; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
1238; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
1239; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
1240; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
1241; SSE-32-NEXT:    fldcw (%esp)
1242; SSE-32-NEXT:    movzbl %al, %eax
1243; SSE-32-NEXT:    shll $31, %eax
1244; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1245; SSE-32-NEXT:    movd %eax, %xmm1
1246; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1247; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1248; SSE-32-NEXT:    movzbl %cl, %eax
1249; SSE-32-NEXT:    shll $31, %eax
1250; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1251; SSE-32-NEXT:    movd %eax, %xmm1
1252; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1253; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1254; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1255; SSE-32-NEXT:    movl %ebp, %esp
1256; SSE-32-NEXT:    popl %ebp
1257; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
1258; SSE-32-NEXT:    retl
1259;
1260; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1261; SSE-64:       # %bb.0:
1262; SSE-64-NEXT:    movaps (%rdi), %xmm1
1263; SSE-64-NEXT:    movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1264; SSE-64-NEXT:    comiss %xmm3, %xmm1
1265; SSE-64-NEXT:    xorps %xmm2, %xmm2
1266; SSE-64-NEXT:    xorps %xmm0, %xmm0
1267; SSE-64-NEXT:    jb .LBB5_2
1268; SSE-64-NEXT:  # %bb.1:
1269; SSE-64-NEXT:    movaps %xmm3, %xmm0
1270; SSE-64-NEXT:  .LBB5_2:
1271; SSE-64-NEXT:    movaps %xmm1, %xmm4
1272; SSE-64-NEXT:    subss %xmm0, %xmm4
1273; SSE-64-NEXT:    cvttss2si %xmm4, %rax
1274; SSE-64-NEXT:    setae %cl
1275; SSE-64-NEXT:    movzbl %cl, %ecx
1276; SSE-64-NEXT:    shlq $63, %rcx
1277; SSE-64-NEXT:    xorq %rax, %rcx
1278; SSE-64-NEXT:    movq %rcx, %xmm0
1279; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1280; SSE-64-NEXT:    comiss %xmm3, %xmm1
1281; SSE-64-NEXT:    jb .LBB5_4
1282; SSE-64-NEXT:  # %bb.3:
1283; SSE-64-NEXT:    movaps %xmm3, %xmm2
1284; SSE-64-NEXT:  .LBB5_4:
1285; SSE-64-NEXT:    subss %xmm2, %xmm1
1286; SSE-64-NEXT:    cvttss2si %xmm1, %rax
1287; SSE-64-NEXT:    setae %cl
1288; SSE-64-NEXT:    movzbl %cl, %ecx
1289; SSE-64-NEXT:    shlq $63, %rcx
1290; SSE-64-NEXT:    xorq %rax, %rcx
1291; SSE-64-NEXT:    movq %rcx, %xmm1
1292; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1293; SSE-64-NEXT:    retq
1294;
1295; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1296; AVX-32:       # %bb.0:
1297; AVX-32-NEXT:    pushl %ebp
1298; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1299; AVX-32-NEXT:    .cfi_offset %ebp, -8
1300; AVX-32-NEXT:    movl %esp, %ebp
1301; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1302; AVX-32-NEXT:    andl $-8, %esp
1303; AVX-32-NEXT:    subl $16, %esp
1304; AVX-32-NEXT:    movl 8(%ebp), %eax
1305; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1306; AVX-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1307; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1308; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
1309; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
1310; AVX-32-NEXT:    jae .LBB5_2
1311; AVX-32-NEXT:  # %bb.1:
1312; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1313; AVX-32-NEXT:  .LBB5_2:
1314; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
1315; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
1316; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
1317; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1318; AVX-32-NEXT:    wait
1319; AVX-32-NEXT:    setae %al
1320; AVX-32-NEXT:    movzbl %al, %eax
1321; AVX-32-NEXT:    shll $31, %eax
1322; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1323; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
1324; AVX-32-NEXT:    jae .LBB5_4
1325; AVX-32-NEXT:  # %bb.3:
1326; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1327; AVX-32-NEXT:  .LBB5_4:
1328; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1329; AVX-32-NEXT:    vmovss %xmm0, (%esp)
1330; AVX-32-NEXT:    flds (%esp)
1331; AVX-32-NEXT:    fisttpll (%esp)
1332; AVX-32-NEXT:    wait
1333; AVX-32-NEXT:    setae %cl
1334; AVX-32-NEXT:    movzbl %cl, %ecx
1335; AVX-32-NEXT:    shll $31, %ecx
1336; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1337; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1338; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1339; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1340; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1341; AVX-32-NEXT:    movl %ebp, %esp
1342; AVX-32-NEXT:    popl %ebp
1343; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1344; AVX-32-NEXT:    retl
1345;
1346; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1347; AVX-64:       # %bb.0:
1348; AVX-64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1349; AVX-64-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1350; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1351; AVX-64-NEXT:    vcomiss %xmm1, %xmm3
1352; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1353; AVX-64-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1354; AVX-64-NEXT:    jb .LBB5_2
1355; AVX-64-NEXT:  # %bb.1:
1356; AVX-64-NEXT:    vmovaps %xmm1, %xmm4
1357; AVX-64-NEXT:  .LBB5_2:
1358; AVX-64-NEXT:    vsubss %xmm4, %xmm3, %xmm3
1359; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
1360; AVX-64-NEXT:    setae %cl
1361; AVX-64-NEXT:    movzbl %cl, %ecx
1362; AVX-64-NEXT:    shlq $63, %rcx
1363; AVX-64-NEXT:    xorq %rax, %rcx
1364; AVX-64-NEXT:    vmovq %rcx, %xmm3
1365; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
1366; AVX-64-NEXT:    jb .LBB5_4
1367; AVX-64-NEXT:  # %bb.3:
1368; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
1369; AVX-64-NEXT:  .LBB5_4:
1370; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
1371; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
1372; AVX-64-NEXT:    setae %cl
1373; AVX-64-NEXT:    movzbl %cl, %ecx
1374; AVX-64-NEXT:    shlq $63, %rcx
1375; AVX-64-NEXT:    xorq %rax, %rcx
1376; AVX-64-NEXT:    vmovq %rcx, %xmm0
1377; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1378; AVX-64-NEXT:    retq
1379;
1380; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1381; AVX512F-32:       # %bb.0:
1382; AVX512F-32-NEXT:    pushl %ebp
1383; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
1384; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
1385; AVX512F-32-NEXT:    movl %esp, %ebp
1386; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
1387; AVX512F-32-NEXT:    andl $-8, %esp
1388; AVX512F-32-NEXT:    subl $16, %esp
1389; AVX512F-32-NEXT:    movl 8(%ebp), %eax
1390; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1391; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1392; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1393; AVX512F-32-NEXT:    xorl %eax, %eax
1394; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
1395; AVX512F-32-NEXT:    setae %al
1396; AVX512F-32-NEXT:    kmovw %eax, %k1
1397; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1398; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1399; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1400; AVX512F-32-NEXT:    xorl %ecx, %ecx
1401; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm0
1402; AVX512F-32-NEXT:    setae %cl
1403; AVX512F-32-NEXT:    kmovw %ecx, %k1
1404; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1405; AVX512F-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1406; AVX512F-32-NEXT:    vmovss %xmm0, (%esp)
1407; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
1408; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1409; AVX512F-32-NEXT:    flds (%esp)
1410; AVX512F-32-NEXT:    fisttpll (%esp)
1411; AVX512F-32-NEXT:    wait
1412; AVX512F-32-NEXT:    shll $31, %eax
1413; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1414; AVX512F-32-NEXT:    shll $31, %ecx
1415; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1416; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1417; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1418; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1419; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1420; AVX512F-32-NEXT:    movl %ebp, %esp
1421; AVX512F-32-NEXT:    popl %ebp
1422; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
1423; AVX512F-32-NEXT:    retl
1424;
1425; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1426; AVX512F-64:       # %bb.0:
1427; AVX512F-64-NEXT:    vcvttss2usi 4(%rdi), %rax
1428; AVX512F-64-NEXT:    vmovq %rax, %xmm0
1429; AVX512F-64-NEXT:    vcvttss2usi (%rdi), %rax
1430; AVX512F-64-NEXT:    vmovq %rax, %xmm1
1431; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1432; AVX512F-64-NEXT:    retq
1433;
1434; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1435; AVX512VL-32:       # %bb.0:
1436; AVX512VL-32-NEXT:    pushl %ebp
1437; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
1438; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
1439; AVX512VL-32-NEXT:    movl %esp, %ebp
1440; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
1441; AVX512VL-32-NEXT:    andl $-8, %esp
1442; AVX512VL-32-NEXT:    subl $16, %esp
1443; AVX512VL-32-NEXT:    movl 8(%ebp), %eax
1444; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1446; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1447; AVX512VL-32-NEXT:    xorl %eax, %eax
1448; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
1449; AVX512VL-32-NEXT:    setae %al
1450; AVX512VL-32-NEXT:    kmovw %eax, %k1
1451; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1452; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1453; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1454; AVX512VL-32-NEXT:    xorl %ecx, %ecx
1455; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm0
1456; AVX512VL-32-NEXT:    setae %cl
1457; AVX512VL-32-NEXT:    kmovw %ecx, %k1
1458; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1459; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1460; AVX512VL-32-NEXT:    vmovss %xmm0, (%esp)
1461; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
1462; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1463; AVX512VL-32-NEXT:    flds (%esp)
1464; AVX512VL-32-NEXT:    fisttpll (%esp)
1465; AVX512VL-32-NEXT:    wait
1466; AVX512VL-32-NEXT:    shll $31, %eax
1467; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1468; AVX512VL-32-NEXT:    shll $31, %ecx
1469; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1470; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1471; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
1472; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1473; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1474; AVX512VL-32-NEXT:    movl %ebp, %esp
1475; AVX512VL-32-NEXT:    popl %ebp
1476; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
1477; AVX512VL-32-NEXT:    retl
1478;
1479; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1480; AVX512VL-64:       # %bb.0:
1481; AVX512VL-64-NEXT:    vcvttss2usi 4(%rdi), %rax
1482; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
1483; AVX512VL-64-NEXT:    vcvttss2usi (%rdi), %rax
1484; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
1485; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1486; AVX512VL-64-NEXT:    retq
1487;
1488; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1489; AVX512DQ-32:       # %bb.0:
1490; AVX512DQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1491; AVX512DQ-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1492; AVX512DQ-32-NEXT:    vcvttps2uqq %ymm0, %zmm0
1493; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1494; AVX512DQ-32-NEXT:    vzeroupper
1495; AVX512DQ-32-NEXT:    retl
1496;
1497; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1498; AVX512DQ-64:       # %bb.0:
1499; AVX512DQ-64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1500; AVX512DQ-64-NEXT:    vcvttps2uqq %ymm0, %zmm0
1501; AVX512DQ-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1502; AVX512DQ-64-NEXT:    vzeroupper
1503; AVX512DQ-64-NEXT:    retq
1504;
1505; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1506; AVX512VLDQ-32:       # %bb.0:
1507; AVX512VLDQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1508; AVX512VLDQ-32-NEXT:    vcvttps2uqq (%eax), %xmm0
1509; AVX512VLDQ-32-NEXT:    retl
1510;
1511; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1512; AVX512VLDQ-64:       # %bb.0:
1513; AVX512VLDQ-64-NEXT:    vcvttps2uqq (%rdi), %xmm0
1514; AVX512VLDQ-64-NEXT:    retq
1515  %a = load <4 x float>, ptr %x
1516  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1517  %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
1518  ret <2 x i64> %c
1519}
1520
1521define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
1522; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1523; SSE-32:       # %bb.0:
1524; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
1525; SSE-32-NEXT:    retl
1526;
1527; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1528; SSE-64:       # %bb.0:
1529; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
1530; SSE-64-NEXT:    retq
1531;
1532; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1533; AVX:       # %bb.0:
1534; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
1535; AVX-NEXT:    ret{{[l|q]}}
1536;
1537; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1538; AVX512F:       # %bb.0:
1539; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
1540; AVX512F-NEXT:    ret{{[l|q]}}
1541;
1542; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1543; AVX512VL:       # %bb.0:
1544; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
1545; AVX512VL-NEXT:    ret{{[l|q]}}
1546;
1547; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1548; AVX512DQ:       # %bb.0:
1549; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1550; AVX512DQ-NEXT:    ret{{[l|q]}}
1551;
1552; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1553; AVX512VLDQ:       # %bb.0:
1554; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1555; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1556  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a,
1557                                              metadata !"fpexcept.strict") #0
1558  ret <2 x i32> %ret
1559}
1560
1561define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
1562; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1563; SSE-32:       # %bb.0:
1564; SSE-32-NEXT:    movsd {{.*#+}} xmm3 = [2.147483648E+9,0.0E+0]
1565; SSE-32-NEXT:    comisd %xmm3, %xmm0
1566; SSE-32-NEXT:    xorpd %xmm2, %xmm2
1567; SSE-32-NEXT:    xorpd %xmm1, %xmm1
1568; SSE-32-NEXT:    jb .LBB7_2
1569; SSE-32-NEXT:  # %bb.1:
1570; SSE-32-NEXT:    movapd %xmm3, %xmm1
1571; SSE-32-NEXT:  .LBB7_2:
1572; SSE-32-NEXT:    setae %al
1573; SSE-32-NEXT:    movzbl %al, %eax
1574; SSE-32-NEXT:    shll $31, %eax
1575; SSE-32-NEXT:    movapd %xmm0, %xmm4
1576; SSE-32-NEXT:    subsd %xmm1, %xmm4
1577; SSE-32-NEXT:    cvttsd2si %xmm4, %ecx
1578; SSE-32-NEXT:    xorl %eax, %ecx
1579; SSE-32-NEXT:    movd %ecx, %xmm1
1580; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
1581; SSE-32-NEXT:    comisd %xmm3, %xmm0
1582; SSE-32-NEXT:    jb .LBB7_4
1583; SSE-32-NEXT:  # %bb.3:
1584; SSE-32-NEXT:    movapd %xmm3, %xmm2
1585; SSE-32-NEXT:  .LBB7_4:
1586; SSE-32-NEXT:    setae %al
1587; SSE-32-NEXT:    movzbl %al, %eax
1588; SSE-32-NEXT:    shll $31, %eax
1589; SSE-32-NEXT:    subsd %xmm2, %xmm0
1590; SSE-32-NEXT:    cvttsd2si %xmm0, %ecx
1591; SSE-32-NEXT:    xorl %eax, %ecx
1592; SSE-32-NEXT:    movd %ecx, %xmm0
1593; SSE-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1594; SSE-32-NEXT:    movdqa %xmm1, %xmm0
1595; SSE-32-NEXT:    retl
1596;
1597; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1598; SSE-64:       # %bb.0:
1599; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
1600; SSE-64-NEXT:    movd %eax, %xmm1
1601; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
1602; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
1603; SSE-64-NEXT:    movd %eax, %xmm0
1604; SSE-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1605; SSE-64-NEXT:    movdqa %xmm1, %xmm0
1606; SSE-64-NEXT:    retq
1607;
1608; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1609; AVX-32:       # %bb.0:
1610; AVX-32-NEXT:    pushl %ebp
1611; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1612; AVX-32-NEXT:    .cfi_offset %ebp, -8
1613; AVX-32-NEXT:    movl %esp, %ebp
1614; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1615; AVX-32-NEXT:    andl $-8, %esp
1616; AVX-32-NEXT:    subl $16, %esp
1617; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1618; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
1619; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
1620; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1621; AVX-32-NEXT:    fldl (%esp)
1622; AVX-32-NEXT:    fisttpll (%esp)
1623; AVX-32-NEXT:    wait
1624; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1625; AVX-32-NEXT:    vpinsrd $1, (%esp), %xmm0, %xmm0
1626; AVX-32-NEXT:    movl %ebp, %esp
1627; AVX-32-NEXT:    popl %ebp
1628; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1629; AVX-32-NEXT:    retl
1630;
1631; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1632; AVX-64:       # %bb.0:
1633; AVX-64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
1634; AVX-64-NEXT:    vcvttsd2si %xmm1, %rax
1635; AVX-64-NEXT:    vcvttsd2si %xmm0, %rcx
1636; AVX-64-NEXT:    vmovd %ecx, %xmm0
1637; AVX-64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
1638; AVX-64-NEXT:    retq
1639;
1640; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1641; AVX512F:       # %bb.0:
1642; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
1643; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
1644; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1645; AVX512F-NEXT:    vzeroupper
1646; AVX512F-NEXT:    ret{{[l|q]}}
1647;
1648; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1649; AVX512VL:       # %bb.0:
1650; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
1651; AVX512VL-NEXT:    ret{{[l|q]}}
1652;
1653; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1654; AVX512DQ:       # %bb.0:
1655; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
1656; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
1657; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1658; AVX512DQ-NEXT:    vzeroupper
1659; AVX512DQ-NEXT:    ret{{[l|q]}}
1660;
1661; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1662; AVX512VLDQ:       # %bb.0:
1663; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
1664; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1665  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a,
1666                                              metadata !"fpexcept.strict") #0
1667  ret <2 x i32> %ret
1668}
1669
1670define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
1671; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1672; SSE-32:       # %bb.0:
1673; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1674; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
1675; SSE-32-NEXT:    retl
1676;
1677; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1678; SSE-64:       # %bb.0:
1679; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1680; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
1681; SSE-64-NEXT:    retq
1682;
1683; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1684; AVX:       # %bb.0:
1685; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1686; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
1687; AVX-NEXT:    ret{{[l|q]}}
1688;
1689; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1690; AVX512F:       # %bb.0:
1691; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1692; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
1693; AVX512F-NEXT:    ret{{[l|q]}}
1694;
1695; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1696; AVX512VL:       # %bb.0:
1697; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1698; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
1699; AVX512VL-NEXT:    ret{{[l|q]}}
1700;
1701; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1702; AVX512DQ:       # %bb.0:
1703; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1704; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
1705; AVX512DQ-NEXT:    ret{{[l|q]}}
1706;
1707; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1708; AVX512VLDQ:       # %bb.0:
1709; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1710; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
1711; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1712  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
1713                                              metadata !"fpexcept.strict") #0
1714  ret <2 x i32> %ret
1715}
1716
1717define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
1718; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1719; SSE-32:       # %bb.0:
1720; SSE-32-NEXT:    movss {{.*#+}} xmm3 = [2.14748365E+9,0.0E+0,0.0E+0,0.0E+0]
1721; SSE-32-NEXT:    comiss %xmm3, %xmm0
1722; SSE-32-NEXT:    xorps %xmm2, %xmm2
1723; SSE-32-NEXT:    xorps %xmm1, %xmm1
1724; SSE-32-NEXT:    jb .LBB9_2
1725; SSE-32-NEXT:  # %bb.1:
1726; SSE-32-NEXT:    movaps %xmm3, %xmm1
1727; SSE-32-NEXT:  .LBB9_2:
1728; SSE-32-NEXT:    setae %al
1729; SSE-32-NEXT:    movzbl %al, %eax
1730; SSE-32-NEXT:    shll $31, %eax
1731; SSE-32-NEXT:    movaps %xmm0, %xmm4
1732; SSE-32-NEXT:    subss %xmm1, %xmm4
1733; SSE-32-NEXT:    cvttss2si %xmm4, %ecx
1734; SSE-32-NEXT:    xorl %eax, %ecx
1735; SSE-32-NEXT:    movd %ecx, %xmm1
1736; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1737; SSE-32-NEXT:    comiss %xmm3, %xmm0
1738; SSE-32-NEXT:    jb .LBB9_4
1739; SSE-32-NEXT:  # %bb.3:
1740; SSE-32-NEXT:    movaps %xmm3, %xmm2
1741; SSE-32-NEXT:  .LBB9_4:
1742; SSE-32-NEXT:    setae %al
1743; SSE-32-NEXT:    movzbl %al, %eax
1744; SSE-32-NEXT:    shll $31, %eax
1745; SSE-32-NEXT:    subss %xmm2, %xmm0
1746; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
1747; SSE-32-NEXT:    xorl %eax, %ecx
1748; SSE-32-NEXT:    movd %ecx, %xmm0
1749; SSE-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1750; SSE-32-NEXT:    movdqa %xmm1, %xmm0
1751; SSE-32-NEXT:    retl
1752;
1753; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1754; SSE-64:       # %bb.0:
1755; SSE-64-NEXT:    cvttss2si %xmm0, %rax
1756; SSE-64-NEXT:    movd %eax, %xmm1
1757; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1758; SSE-64-NEXT:    cvttss2si %xmm0, %rax
1759; SSE-64-NEXT:    movd %eax, %xmm0
1760; SSE-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1761; SSE-64-NEXT:    movdqa %xmm1, %xmm0
1762; SSE-64-NEXT:    retq
1763;
1764; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1765; AVX-32:       # %bb.0:
1766; AVX-32-NEXT:    pushl %ebp
1767; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1768; AVX-32-NEXT:    .cfi_offset %ebp, -8
1769; AVX-32-NEXT:    movl %esp, %ebp
1770; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1771; AVX-32-NEXT:    andl $-8, %esp
1772; AVX-32-NEXT:    subl $16, %esp
1773; AVX-32-NEXT:    vmovss %xmm0, (%esp)
1774; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
1775; AVX-32-NEXT:    flds (%esp)
1776; AVX-32-NEXT:    fisttpll (%esp)
1777; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
1778; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1779; AVX-32-NEXT:    wait
1780; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1781; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
1782; AVX-32-NEXT:    movl %ebp, %esp
1783; AVX-32-NEXT:    popl %ebp
1784; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1785; AVX-32-NEXT:    retl
1786;
1787; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1788; AVX-64:       # %bb.0:
1789; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1790; AVX-64-NEXT:    vcvttss2si %xmm1, %rax
1791; AVX-64-NEXT:    vcvttss2si %xmm0, %rcx
1792; AVX-64-NEXT:    vmovd %ecx, %xmm0
1793; AVX-64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
1794; AVX-64-NEXT:    retq
1795;
1796; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1797; AVX512F:       # %bb.0:
1798; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1799; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1800; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1801; AVX512F-NEXT:    vzeroupper
1802; AVX512F-NEXT:    ret{{[l|q]}}
1803;
1804; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1805; AVX512VL:       # %bb.0:
1806; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1807; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
1808; AVX512VL-NEXT:    ret{{[l|q]}}
1809;
1810; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1811; AVX512DQ:       # %bb.0:
1812; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1813; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1814; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1815; AVX512DQ-NEXT:    vzeroupper
1816; AVX512DQ-NEXT:    ret{{[l|q]}}
1817;
1818; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1819; AVX512VLDQ:       # %bb.0:
1820; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1821; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
1822; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1823  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
1824                                              metadata !"fpexcept.strict") #0
1825  ret <2 x i32> %ret
1826}
1827
1828define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
1829; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1830; SSE-32:       # %bb.0:
1831; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
1832; SSE-32-NEXT:    packssdw %xmm0, %xmm0
1833; SSE-32-NEXT:    retl
1834;
1835; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1836; SSE-64:       # %bb.0:
1837; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
1838; SSE-64-NEXT:    packssdw %xmm0, %xmm0
1839; SSE-64-NEXT:    retq
1840;
1841; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1842; AVX:       # %bb.0:
1843; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
1844; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1845; AVX-NEXT:    ret{{[l|q]}}
1846;
1847; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1848; AVX512F:       # %bb.0:
1849; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
1850; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1851; AVX512F-NEXT:    ret{{[l|q]}}
1852;
1853; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1854; AVX512VL:       # %bb.0:
1855; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
1856; AVX512VL-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1857; AVX512VL-NEXT:    ret{{[l|q]}}
1858;
1859; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1860; AVX512DQ:       # %bb.0:
1861; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1862; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1863; AVX512DQ-NEXT:    ret{{[l|q]}}
1864;
1865; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1866; AVX512VLDQ:       # %bb.0:
1867; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1868; AVX512VLDQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1869; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1870  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
1871                                              metadata !"fpexcept.strict") #0
1872  ret <2 x i16> %ret
1873}
1874
1875define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
1876; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1877; SSE-32:       # %bb.0:
1878; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
1879; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1880; SSE-32-NEXT:    retl
1881;
1882; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1883; SSE-64:       # %bb.0:
1884; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
1885; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1886; SSE-64-NEXT:    retq
1887;
1888; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1889; AVX:       # %bb.0:
1890; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
1891; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1892; AVX-NEXT:    ret{{[l|q]}}
1893;
1894; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1895; AVX512F:       # %bb.0:
1896; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
1897; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1898; AVX512F-NEXT:    ret{{[l|q]}}
1899;
1900; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1901; AVX512VL:       # %bb.0:
1902; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
1903; AVX512VL-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1904; AVX512VL-NEXT:    ret{{[l|q]}}
1905;
1906; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1907; AVX512DQ:       # %bb.0:
1908; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1909; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1910; AVX512DQ-NEXT:    ret{{[l|q]}}
1911;
1912; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1913; AVX512VLDQ:       # %bb.0:
1914; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
1915; AVX512VLDQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1916; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1917  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
1918                                              metadata !"fpexcept.strict") #0
1919  ret <2 x i16> %ret
1920}
1921
1922define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
1923; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1924; SSE-32:       # %bb.0:
1925; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1926; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
1927; SSE-32-NEXT:    packssdw %xmm0, %xmm0
1928; SSE-32-NEXT:    retl
1929;
1930; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1931; SSE-64:       # %bb.0:
1932; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1933; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
1934; SSE-64-NEXT:    packssdw %xmm0, %xmm0
1935; SSE-64-NEXT:    retq
1936;
1937; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1938; AVX:       # %bb.0:
1939; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1940; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
1941; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1942; AVX-NEXT:    ret{{[l|q]}}
1943;
1944; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1945; AVX512F:       # %bb.0:
1946; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1947; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
1948; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1949; AVX512F-NEXT:    ret{{[l|q]}}
1950;
1951; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1952; AVX512VL:       # %bb.0:
1953; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1954; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
1955; AVX512VL-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1956; AVX512VL-NEXT:    ret{{[l|q]}}
1957;
1958; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1959; AVX512DQ:       # %bb.0:
1960; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1961; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
1962; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1963; AVX512DQ-NEXT:    ret{{[l|q]}}
1964;
1965; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1966; AVX512VLDQ:       # %bb.0:
1967; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1968; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
1969; AVX512VLDQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1970; AVX512VLDQ-NEXT:    ret{{[l|q]}}
1971  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
1972                                              metadata !"fpexcept.strict") #0
1973  ret <2 x i16> %ret
1974}
1975
1976define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
1977; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1978; SSE-32:       # %bb.0:
1979; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1980; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
1981; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1982; SSE-32-NEXT:    retl
1983;
1984; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1985; SSE-64:       # %bb.0:
1986; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1987; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
1988; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1989; SSE-64-NEXT:    retq
1990;
1991; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1992; AVX:       # %bb.0:
1993; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1994; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
1995; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1996; AVX-NEXT:    ret{{[l|q]}}
1997;
1998; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1999; AVX512F:       # %bb.0:
2000; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2001; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2002; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2003; AVX512F-NEXT:    ret{{[l|q]}}
2004;
2005; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2006; AVX512VL:       # %bb.0:
2007; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2008; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2009; AVX512VL-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2010; AVX512VL-NEXT:    ret{{[l|q]}}
2011;
2012; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2013; AVX512DQ:       # %bb.0:
2014; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2015; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2016; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2017; AVX512DQ-NEXT:    ret{{[l|q]}}
2018;
2019; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2020; AVX512VLDQ:       # %bb.0:
2021; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2022; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2023; AVX512VLDQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2024; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2025  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
2026                                              metadata !"fpexcept.strict") #0
2027  ret <2 x i16> %ret
2028}
2029
2030define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
2031; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2032; SSE-32:       # %bb.0:
2033; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
2034; SSE-32-NEXT:    packssdw %xmm0, %xmm0
2035; SSE-32-NEXT:    packsswb %xmm0, %xmm0
2036; SSE-32-NEXT:    retl
2037;
2038; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2039; SSE-64:       # %bb.0:
2040; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
2041; SSE-64-NEXT:    packssdw %xmm0, %xmm0
2042; SSE-64-NEXT:    packsswb %xmm0, %xmm0
2043; SSE-64-NEXT:    retq
2044;
2045; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2046; AVX:       # %bb.0:
2047; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2048; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2049; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2050; AVX-NEXT:    ret{{[l|q]}}
2051;
2052; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2053; AVX512F:       # %bb.0:
2054; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2055; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2056; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2057; AVX512F-NEXT:    ret{{[l|q]}}
2058;
2059; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2060; AVX512VL:       # %bb.0:
2061; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2062; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2063; AVX512VL-NEXT:    ret{{[l|q]}}
2064;
2065; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2066; AVX512DQ:       # %bb.0:
2067; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2068; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2069; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2070; AVX512DQ-NEXT:    ret{{[l|q]}}
2071;
2072; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2073; AVX512VLDQ:       # %bb.0:
2074; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2075; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2076; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2077  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a,
2078                                              metadata !"fpexcept.strict") #0
2079  ret <2 x i8> %ret
2080}
2081
2082define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
2083; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2084; SSE-32:       # %bb.0:
2085; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
2086; SSE-32-NEXT:    packuswb %xmm0, %xmm0
2087; SSE-32-NEXT:    packuswb %xmm0, %xmm0
2088; SSE-32-NEXT:    retl
2089;
2090; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2091; SSE-64:       # %bb.0:
2092; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
2093; SSE-64-NEXT:    packuswb %xmm0, %xmm0
2094; SSE-64-NEXT:    packuswb %xmm0, %xmm0
2095; SSE-64-NEXT:    retq
2096;
2097; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2098; AVX:       # %bb.0:
2099; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2100; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2101; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2102; AVX-NEXT:    ret{{[l|q]}}
2103;
2104; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2105; AVX512F:       # %bb.0:
2106; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2107; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2108; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2109; AVX512F-NEXT:    ret{{[l|q]}}
2110;
2111; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2112; AVX512VL:       # %bb.0:
2113; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2114; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2115; AVX512VL-NEXT:    ret{{[l|q]}}
2116;
2117; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2118; AVX512DQ:       # %bb.0:
2119; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2120; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2121; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2122; AVX512DQ-NEXT:    ret{{[l|q]}}
2123;
2124; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2125; AVX512VLDQ:       # %bb.0:
2126; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2127; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2128; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2129  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a,
2130                                              metadata !"fpexcept.strict") #0
2131  ret <2 x i8> %ret
2132}
2133
2134define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
2135; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2136; SSE-32:       # %bb.0:
2137; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2138; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
2139; SSE-32-NEXT:    packssdw %xmm0, %xmm0
2140; SSE-32-NEXT:    packsswb %xmm0, %xmm0
2141; SSE-32-NEXT:    retl
2142;
2143; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2144; SSE-64:       # %bb.0:
2145; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2146; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
2147; SSE-64-NEXT:    packssdw %xmm0, %xmm0
2148; SSE-64-NEXT:    packsswb %xmm0, %xmm0
2149; SSE-64-NEXT:    retq
2150;
2151; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2152; AVX:       # %bb.0:
2153; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2154; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2155; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2156; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2157; AVX-NEXT:    ret{{[l|q]}}
2158;
2159; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2160; AVX512F:       # %bb.0:
2161; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2162; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2163; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2164; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2165; AVX512F-NEXT:    ret{{[l|q]}}
2166;
2167; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2168; AVX512VL:       # %bb.0:
2169; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2170; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2171; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2172; AVX512VL-NEXT:    ret{{[l|q]}}
2173;
2174; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2175; AVX512DQ:       # %bb.0:
2176; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2177; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2178; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2179; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2180; AVX512DQ-NEXT:    ret{{[l|q]}}
2181;
2182; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2183; AVX512VLDQ:       # %bb.0:
2184; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2185; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2186; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2187; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2188  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
2189                                              metadata !"fpexcept.strict") #0
2190  ret <2 x i8> %ret
2191}
2192
2193define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
2194; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2195; SSE-32:       # %bb.0:
2196; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2197; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
2198; SSE-32-NEXT:    packuswb %xmm0, %xmm0
2199; SSE-32-NEXT:    packuswb %xmm0, %xmm0
2200; SSE-32-NEXT:    retl
2201;
2202; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2203; SSE-64:       # %bb.0:
2204; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2205; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
2206; SSE-64-NEXT:    packuswb %xmm0, %xmm0
2207; SSE-64-NEXT:    packuswb %xmm0, %xmm0
2208; SSE-64-NEXT:    retq
2209;
2210; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2211; AVX:       # %bb.0:
2212; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2213; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2214; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2215; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2216; AVX-NEXT:    ret{{[l|q]}}
2217;
2218; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2219; AVX512F:       # %bb.0:
2220; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2221; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2222; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2223; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2224; AVX512F-NEXT:    ret{{[l|q]}}
2225;
2226; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2227; AVX512VL:       # %bb.0:
2228; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2229; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2230; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2231; AVX512VL-NEXT:    ret{{[l|q]}}
2232;
2233; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2234; AVX512DQ:       # %bb.0:
2235; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2236; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2237; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
2238; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
2239; AVX512DQ-NEXT:    ret{{[l|q]}}
2240;
2241; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2242; AVX512VLDQ:       # %bb.0:
2243; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2244; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2245; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2246; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2247  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
2248                                              metadata !"fpexcept.strict") #0
2249  ret <2 x i8> %ret
2250}
2251
2252define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
2253; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2254; SSE-32:       # %bb.0:
2255; SSE-32-NEXT:    pushl %ebp
2256; SSE-32-NEXT:    .cfi_def_cfa_offset 8
2257; SSE-32-NEXT:    .cfi_offset %ebp, -8
2258; SSE-32-NEXT:    movl %esp, %ebp
2259; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
2260; SSE-32-NEXT:    andl $-8, %esp
2261; SSE-32-NEXT:    subl $24, %esp
2262; SSE-32-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
2263; SSE-32-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
2264; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
2265; SSE-32-NEXT:    wait
2266; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
2267; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
2268; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
2269; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
2270; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2271; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2272; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2273; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
2274; SSE-32-NEXT:    wait
2275; SSE-32-NEXT:    fnstcw (%esp)
2276; SSE-32-NEXT:    movzwl (%esp), %eax
2277; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
2278; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
2279; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2280; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2281; SSE-32-NEXT:    fldcw (%esp)
2282; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2283; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2284; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2285; SSE-32-NEXT:    movl %ebp, %esp
2286; SSE-32-NEXT:    popl %ebp
2287; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
2288; SSE-32-NEXT:    retl
2289;
2290; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2291; SSE-64:       # %bb.0:
2292; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
2293; SSE-64-NEXT:    movq %rax, %xmm1
2294; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2295; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
2296; SSE-64-NEXT:    movq %rax, %xmm0
2297; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2298; SSE-64-NEXT:    movdqa %xmm1, %xmm0
2299; SSE-64-NEXT:    retq
2300;
2301; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2302; AVX-32:       # %bb.0:
2303; AVX-32-NEXT:    pushl %ebp
2304; AVX-32-NEXT:    .cfi_def_cfa_offset 8
2305; AVX-32-NEXT:    .cfi_offset %ebp, -8
2306; AVX-32-NEXT:    movl %esp, %ebp
2307; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
2308; AVX-32-NEXT:    andl $-8, %esp
2309; AVX-32-NEXT:    subl $16, %esp
2310; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
2311; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
2312; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
2313; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
2314; AVX-32-NEXT:    fldl (%esp)
2315; AVX-32-NEXT:    fisttpll (%esp)
2316; AVX-32-NEXT:    wait
2317; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2318; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
2319; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
2320; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
2321; AVX-32-NEXT:    movl %ebp, %esp
2322; AVX-32-NEXT:    popl %ebp
2323; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
2324; AVX-32-NEXT:    retl
2325;
2326; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2327; AVX-64:       # %bb.0:
2328; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
2329; AVX-64-NEXT:    vmovq %rax, %xmm1
2330; AVX-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
2331; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
2332; AVX-64-NEXT:    vmovq %rax, %xmm0
2333; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2334; AVX-64-NEXT:    retq
2335;
2336; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2337; AVX512F:       # %bb.0:
2338; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2339; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
2340; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
2341; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2342; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2343; AVX512F-NEXT:    vzeroupper
2344; AVX512F-NEXT:    ret{{[l|q]}}
2345;
2346; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2347; AVX512VL:       # %bb.0:
2348; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2349; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
2350; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
2351; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
2352; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2353; AVX512VL-NEXT:    ret{{[l|q]}}
2354;
2355; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2356; AVX512DQ:       # %bb.0:
2357; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2358; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
2359; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
2360; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2361; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2362; AVX512DQ-NEXT:    vzeroupper
2363; AVX512DQ-NEXT:    ret{{[l|q]}}
2364;
2365; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2366; AVX512VLDQ:       # %bb.0:
2367; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2368; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2369; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
2370; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
2371; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2372  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a,
2373                                              metadata !"fpexcept.strict") #0
2374  ret <2 x i1> %ret
2375}
2376
2377define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
2378; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2379; SSE-32:       # %bb.0:
2380; SSE-32-NEXT:    pushl %ebp
2381; SSE-32-NEXT:    .cfi_def_cfa_offset 8
2382; SSE-32-NEXT:    .cfi_offset %ebp, -8
2383; SSE-32-NEXT:    movl %esp, %ebp
2384; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
2385; SSE-32-NEXT:    andl $-8, %esp
2386; SSE-32-NEXT:    subl $24, %esp
2387; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
2388; SSE-32-NEXT:    comisd %xmm1, %xmm0
2389; SSE-32-NEXT:    movapd %xmm1, %xmm2
2390; SSE-32-NEXT:    jae .LBB19_2
2391; SSE-32-NEXT:  # %bb.1:
2392; SSE-32-NEXT:    xorpd %xmm2, %xmm2
2393; SSE-32-NEXT:  .LBB19_2:
2394; SSE-32-NEXT:    movapd %xmm0, %xmm3
2395; SSE-32-NEXT:    subsd %xmm2, %xmm3
2396; SSE-32-NEXT:    movsd %xmm3, {{[0-9]+}}(%esp)
2397; SSE-32-NEXT:    setae %al
2398; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
2399; SSE-32-NEXT:    wait
2400; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
2401; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
2402; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
2403; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
2404; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2405; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2406; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2407; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2408; SSE-32-NEXT:    comisd %xmm1, %xmm0
2409; SSE-32-NEXT:    jae .LBB19_4
2410; SSE-32-NEXT:  # %bb.3:
2411; SSE-32-NEXT:    xorpd %xmm1, %xmm1
2412; SSE-32-NEXT:  .LBB19_4:
2413; SSE-32-NEXT:    subsd %xmm1, %xmm0
2414; SSE-32-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
2415; SSE-32-NEXT:    setae %cl
2416; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
2417; SSE-32-NEXT:    wait
2418; SSE-32-NEXT:    fnstcw (%esp)
2419; SSE-32-NEXT:    movzwl (%esp), %edx
2420; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
2421; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
2422; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2423; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2424; SSE-32-NEXT:    fldcw (%esp)
2425; SSE-32-NEXT:    movzbl %al, %eax
2426; SSE-32-NEXT:    shll $31, %eax
2427; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2428; SSE-32-NEXT:    movd %eax, %xmm1
2429; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2430; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2431; SSE-32-NEXT:    movzbl %cl, %eax
2432; SSE-32-NEXT:    shll $31, %eax
2433; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2434; SSE-32-NEXT:    movd %eax, %xmm1
2435; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2436; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2437; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2438; SSE-32-NEXT:    movl %ebp, %esp
2439; SSE-32-NEXT:    popl %ebp
2440; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
2441; SSE-32-NEXT:    retl
2442;
2443; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2444; SSE-64:       # %bb.0:
2445; SSE-64-NEXT:    movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0]
2446; SSE-64-NEXT:    comisd %xmm3, %xmm0
2447; SSE-64-NEXT:    xorpd %xmm2, %xmm2
2448; SSE-64-NEXT:    xorpd %xmm1, %xmm1
2449; SSE-64-NEXT:    jb .LBB19_2
2450; SSE-64-NEXT:  # %bb.1:
2451; SSE-64-NEXT:    movapd %xmm3, %xmm1
2452; SSE-64-NEXT:  .LBB19_2:
2453; SSE-64-NEXT:    movapd %xmm0, %xmm4
2454; SSE-64-NEXT:    subsd %xmm1, %xmm4
2455; SSE-64-NEXT:    cvttsd2si %xmm4, %rax
2456; SSE-64-NEXT:    setae %cl
2457; SSE-64-NEXT:    movzbl %cl, %ecx
2458; SSE-64-NEXT:    shlq $63, %rcx
2459; SSE-64-NEXT:    xorq %rax, %rcx
2460; SSE-64-NEXT:    movq %rcx, %xmm1
2461; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2462; SSE-64-NEXT:    comisd %xmm3, %xmm0
2463; SSE-64-NEXT:    jb .LBB19_4
2464; SSE-64-NEXT:  # %bb.3:
2465; SSE-64-NEXT:    movapd %xmm3, %xmm2
2466; SSE-64-NEXT:  .LBB19_4:
2467; SSE-64-NEXT:    subsd %xmm2, %xmm0
2468; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
2469; SSE-64-NEXT:    setae %cl
2470; SSE-64-NEXT:    movzbl %cl, %ecx
2471; SSE-64-NEXT:    shlq $63, %rcx
2472; SSE-64-NEXT:    xorq %rax, %rcx
2473; SSE-64-NEXT:    movq %rcx, %xmm0
2474; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2475; SSE-64-NEXT:    movdqa %xmm1, %xmm0
2476; SSE-64-NEXT:    retq
2477;
2478; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2479; AVX-32:       # %bb.0:
2480; AVX-32-NEXT:    pushl %ebp
2481; AVX-32-NEXT:    .cfi_def_cfa_offset 8
2482; AVX-32-NEXT:    .cfi_offset %ebp, -8
2483; AVX-32-NEXT:    movl %esp, %ebp
2484; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
2485; AVX-32-NEXT:    andl $-8, %esp
2486; AVX-32-NEXT:    subl $16, %esp
2487; AVX-32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
2488; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
2489; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
2490; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
2491; AVX-32-NEXT:    jae .LBB19_2
2492; AVX-32-NEXT:  # %bb.1:
2493; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2494; AVX-32-NEXT:  .LBB19_2:
2495; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
2496; AVX-32-NEXT:    vmovsd %xmm2, (%esp)
2497; AVX-32-NEXT:    fldl (%esp)
2498; AVX-32-NEXT:    fisttpll (%esp)
2499; AVX-32-NEXT:    wait
2500; AVX-32-NEXT:    setae %al
2501; AVX-32-NEXT:    movzbl %al, %eax
2502; AVX-32-NEXT:    shll $31, %eax
2503; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2504; AVX-32-NEXT:    vcomisd %xmm1, %xmm0
2505; AVX-32-NEXT:    jae .LBB19_4
2506; AVX-32-NEXT:  # %bb.3:
2507; AVX-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
2508; AVX-32-NEXT:  .LBB19_4:
2509; AVX-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
2510; AVX-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
2511; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
2512; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
2513; AVX-32-NEXT:    wait
2514; AVX-32-NEXT:    setae %cl
2515; AVX-32-NEXT:    movzbl %cl, %ecx
2516; AVX-32-NEXT:    shll $31, %ecx
2517; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
2518; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2519; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
2520; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
2521; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
2522; AVX-32-NEXT:    movl %ebp, %esp
2523; AVX-32-NEXT:    popl %ebp
2524; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
2525; AVX-32-NEXT:    retl
2526;
2527; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2528; AVX-64:       # %bb.0:
2529; AVX-64-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
2530; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
2531; AVX-64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2532; AVX-64-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2533; AVX-64-NEXT:    jb .LBB19_2
2534; AVX-64-NEXT:  # %bb.1:
2535; AVX-64-NEXT:    vmovapd %xmm1, %xmm3
2536; AVX-64-NEXT:  .LBB19_2:
2537; AVX-64-NEXT:    vsubsd %xmm3, %xmm0, %xmm3
2538; AVX-64-NEXT:    vcvttsd2si %xmm3, %rax
2539; AVX-64-NEXT:    setae %cl
2540; AVX-64-NEXT:    movzbl %cl, %ecx
2541; AVX-64-NEXT:    shlq $63, %rcx
2542; AVX-64-NEXT:    xorq %rax, %rcx
2543; AVX-64-NEXT:    vmovq %rcx, %xmm3
2544; AVX-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
2545; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
2546; AVX-64-NEXT:    jb .LBB19_4
2547; AVX-64-NEXT:  # %bb.3:
2548; AVX-64-NEXT:    vmovapd %xmm1, %xmm2
2549; AVX-64-NEXT:  .LBB19_4:
2550; AVX-64-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
2551; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
2552; AVX-64-NEXT:    setae %cl
2553; AVX-64-NEXT:    movzbl %cl, %ecx
2554; AVX-64-NEXT:    shlq $63, %rcx
2555; AVX-64-NEXT:    xorq %rax, %rcx
2556; AVX-64-NEXT:    vmovq %rcx, %xmm0
2557; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
2558; AVX-64-NEXT:    retq
2559;
2560; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2561; AVX512F:       # %bb.0:
2562; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
2563; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
2564; AVX512F-NEXT:    vpslld $31, %ymm0, %ymm0
2565; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
2566; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2567; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2568; AVX512F-NEXT:    vzeroupper
2569; AVX512F-NEXT:    ret{{[l|q]}}
2570;
2571; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2572; AVX512VL:       # %bb.0:
2573; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
2574; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
2575; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
2576; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
2577; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2578; AVX512VL-NEXT:    ret{{[l|q]}}
2579;
2580; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2581; AVX512DQ:       # %bb.0:
2582; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
2583; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
2584; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
2585; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
2586; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2587; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2588; AVX512DQ-NEXT:    vzeroupper
2589; AVX512DQ-NEXT:    ret{{[l|q]}}
2590;
2591; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2592; AVX512VLDQ:       # %bb.0:
2593; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
2594; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2595; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
2596; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
2597; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2598  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a,
2599                                              metadata !"fpexcept.strict") #0
2600  ret <2 x i1> %ret
2601}
2602
2603define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
2604; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2605; SSE-32:       # %bb.0:
2606; SSE-32-NEXT:    pushl %ebp
2607; SSE-32-NEXT:    .cfi_def_cfa_offset 8
2608; SSE-32-NEXT:    .cfi_offset %ebp, -8
2609; SSE-32-NEXT:    movl %esp, %ebp
2610; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
2611; SSE-32-NEXT:    andl $-8, %esp
2612; SSE-32-NEXT:    subl $24, %esp
2613; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2614; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2615; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2616; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
2617; SSE-32-NEXT:    wait
2618; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
2619; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
2620; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
2621; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
2622; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2623; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2624; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2625; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
2626; SSE-32-NEXT:    wait
2627; SSE-32-NEXT:    fnstcw (%esp)
2628; SSE-32-NEXT:    movzwl (%esp), %eax
2629; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
2630; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
2631; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2632; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2633; SSE-32-NEXT:    fldcw (%esp)
2634; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2635; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2636; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2637; SSE-32-NEXT:    movl %ebp, %esp
2638; SSE-32-NEXT:    popl %ebp
2639; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
2640; SSE-32-NEXT:    retl
2641;
2642; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2643; SSE-64:       # %bb.0:
2644; SSE-64-NEXT:    cvttss2si %xmm0, %rax
2645; SSE-64-NEXT:    movq %rax, %xmm1
2646; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2647; SSE-64-NEXT:    cvttss2si %xmm0, %rax
2648; SSE-64-NEXT:    movq %rax, %xmm0
2649; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2650; SSE-64-NEXT:    movdqa %xmm1, %xmm0
2651; SSE-64-NEXT:    retq
2652;
2653; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2654; AVX-32:       # %bb.0:
2655; AVX-32-NEXT:    pushl %ebp
2656; AVX-32-NEXT:    .cfi_def_cfa_offset 8
2657; AVX-32-NEXT:    .cfi_offset %ebp, -8
2658; AVX-32-NEXT:    movl %esp, %ebp
2659; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
2660; AVX-32-NEXT:    andl $-8, %esp
2661; AVX-32-NEXT:    subl $16, %esp
2662; AVX-32-NEXT:    vmovss %xmm0, (%esp)
2663; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
2664; AVX-32-NEXT:    flds (%esp)
2665; AVX-32-NEXT:    fisttpll (%esp)
2666; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
2667; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
2668; AVX-32-NEXT:    wait
2669; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2670; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
2671; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
2672; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
2673; AVX-32-NEXT:    movl %ebp, %esp
2674; AVX-32-NEXT:    popl %ebp
2675; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
2676; AVX-32-NEXT:    retl
2677;
2678; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2679; AVX-64:       # %bb.0:
2680; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
2681; AVX-64-NEXT:    vmovq %rax, %xmm1
2682; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2683; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
2684; AVX-64-NEXT:    vmovq %rax, %xmm0
2685; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2686; AVX-64-NEXT:    retq
2687;
2688; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2689; AVX512F:       # %bb.0:
2690; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
2691; AVX512F-NEXT:    andl $1, %eax
2692; AVX512F-NEXT:    kmovw %eax, %k0
2693; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2694; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
2695; AVX512F-NEXT:    kmovw %eax, %k1
2696; AVX512F-NEXT:    kshiftlw $1, %k1, %k1
2697; AVX512F-NEXT:    korw %k1, %k0, %k1
2698; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2699; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2700; AVX512F-NEXT:    vzeroupper
2701; AVX512F-NEXT:    ret{{[l|q]}}
2702;
2703; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2704; AVX512VL:       # %bb.0:
2705; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
2706; AVX512VL-NEXT:    andl $1, %eax
2707; AVX512VL-NEXT:    kmovw %eax, %k0
2708; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2709; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
2710; AVX512VL-NEXT:    kmovw %eax, %k1
2711; AVX512VL-NEXT:    kshiftlw $1, %k1, %k1
2712; AVX512VL-NEXT:    korw %k1, %k0, %k1
2713; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
2714; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2715; AVX512VL-NEXT:    ret{{[l|q]}}
2716;
2717; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2718; AVX512DQ:       # %bb.0:
2719; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2720; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
2721; AVX512DQ-NEXT:    kmovw %eax, %k0
2722; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
2723; AVX512DQ-NEXT:    vcvttss2si %xmm0, %eax
2724; AVX512DQ-NEXT:    kmovw %eax, %k1
2725; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
2726; AVX512DQ-NEXT:    kshiftrb $7, %k1, %k1
2727; AVX512DQ-NEXT:    korw %k0, %k1, %k0
2728; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2729; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2730; AVX512DQ-NEXT:    vzeroupper
2731; AVX512DQ-NEXT:    ret{{[l|q]}}
2732;
2733; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2734; AVX512VLDQ:       # %bb.0:
2735; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2736; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
2737; AVX512VLDQ-NEXT:    kmovw %eax, %k0
2738; AVX512VLDQ-NEXT:    kshiftlb $1, %k0, %k0
2739; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %eax
2740; AVX512VLDQ-NEXT:    kmovw %eax, %k1
2741; AVX512VLDQ-NEXT:    kshiftlb $7, %k1, %k1
2742; AVX512VLDQ-NEXT:    kshiftrb $7, %k1, %k1
2743; AVX512VLDQ-NEXT:    korw %k0, %k1, %k0
2744; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
2745; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2746  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a,
2747                                              metadata !"fpexcept.strict") #0
2748  ret <2 x i1> %ret
2749}
2750
2751define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
2752; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2753; SSE-32:       # %bb.0:
2754; SSE-32-NEXT:    pushl %ebp
2755; SSE-32-NEXT:    .cfi_def_cfa_offset 8
2756; SSE-32-NEXT:    .cfi_offset %ebp, -8
2757; SSE-32-NEXT:    movl %esp, %ebp
2758; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
2759; SSE-32-NEXT:    andl $-8, %esp
2760; SSE-32-NEXT:    subl $24, %esp
2761; SSE-32-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2762; SSE-32-NEXT:    comiss %xmm1, %xmm0
2763; SSE-32-NEXT:    movaps %xmm1, %xmm2
2764; SSE-32-NEXT:    jae .LBB21_2
2765; SSE-32-NEXT:  # %bb.1:
2766; SSE-32-NEXT:    xorps %xmm2, %xmm2
2767; SSE-32-NEXT:  .LBB21_2:
2768; SSE-32-NEXT:    movaps %xmm0, %xmm3
2769; SSE-32-NEXT:    subss %xmm2, %xmm3
2770; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
2771; SSE-32-NEXT:    setae %al
2772; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
2773; SSE-32-NEXT:    wait
2774; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
2775; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
2776; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
2777; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
2778; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2779; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2780; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2781; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2782; SSE-32-NEXT:    comiss %xmm1, %xmm0
2783; SSE-32-NEXT:    jae .LBB21_4
2784; SSE-32-NEXT:  # %bb.3:
2785; SSE-32-NEXT:    xorps %xmm1, %xmm1
2786; SSE-32-NEXT:  .LBB21_4:
2787; SSE-32-NEXT:    subss %xmm1, %xmm0
2788; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2789; SSE-32-NEXT:    setae %cl
2790; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
2791; SSE-32-NEXT:    wait
2792; SSE-32-NEXT:    fnstcw (%esp)
2793; SSE-32-NEXT:    movzwl (%esp), %edx
2794; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
2795; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
2796; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
2797; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
2798; SSE-32-NEXT:    fldcw (%esp)
2799; SSE-32-NEXT:    movzbl %al, %eax
2800; SSE-32-NEXT:    shll $31, %eax
2801; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2802; SSE-32-NEXT:    movd %eax, %xmm1
2803; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2804; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2805; SSE-32-NEXT:    movzbl %cl, %eax
2806; SSE-32-NEXT:    shll $31, %eax
2807; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2808; SSE-32-NEXT:    movd %eax, %xmm1
2809; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2810; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2811; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2812; SSE-32-NEXT:    movl %ebp, %esp
2813; SSE-32-NEXT:    popl %ebp
2814; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
2815; SSE-32-NEXT:    retl
2816;
2817; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2818; SSE-64:       # %bb.0:
2819; SSE-64-NEXT:    movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2820; SSE-64-NEXT:    comiss %xmm3, %xmm0
2821; SSE-64-NEXT:    xorps %xmm2, %xmm2
2822; SSE-64-NEXT:    xorps %xmm1, %xmm1
2823; SSE-64-NEXT:    jb .LBB21_2
2824; SSE-64-NEXT:  # %bb.1:
2825; SSE-64-NEXT:    movaps %xmm3, %xmm1
2826; SSE-64-NEXT:  .LBB21_2:
2827; SSE-64-NEXT:    movaps %xmm0, %xmm4
2828; SSE-64-NEXT:    subss %xmm1, %xmm4
2829; SSE-64-NEXT:    cvttss2si %xmm4, %rax
2830; SSE-64-NEXT:    setae %cl
2831; SSE-64-NEXT:    movzbl %cl, %ecx
2832; SSE-64-NEXT:    shlq $63, %rcx
2833; SSE-64-NEXT:    xorq %rax, %rcx
2834; SSE-64-NEXT:    movq %rcx, %xmm1
2835; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2836; SSE-64-NEXT:    comiss %xmm3, %xmm0
2837; SSE-64-NEXT:    jb .LBB21_4
2838; SSE-64-NEXT:  # %bb.3:
2839; SSE-64-NEXT:    movaps %xmm3, %xmm2
2840; SSE-64-NEXT:  .LBB21_4:
2841; SSE-64-NEXT:    subss %xmm2, %xmm0
2842; SSE-64-NEXT:    cvttss2si %xmm0, %rax
2843; SSE-64-NEXT:    setae %cl
2844; SSE-64-NEXT:    movzbl %cl, %ecx
2845; SSE-64-NEXT:    shlq $63, %rcx
2846; SSE-64-NEXT:    xorq %rax, %rcx
2847; SSE-64-NEXT:    movq %rcx, %xmm0
2848; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2849; SSE-64-NEXT:    movdqa %xmm1, %xmm0
2850; SSE-64-NEXT:    retq
2851;
2852; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2853; AVX-32:       # %bb.0:
2854; AVX-32-NEXT:    pushl %ebp
2855; AVX-32-NEXT:    .cfi_def_cfa_offset 8
2856; AVX-32-NEXT:    .cfi_offset %ebp, -8
2857; AVX-32-NEXT:    movl %esp, %ebp
2858; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
2859; AVX-32-NEXT:    andl $-8, %esp
2860; AVX-32-NEXT:    subl $16, %esp
2861; AVX-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
2862; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2863; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
2864; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
2865; AVX-32-NEXT:    jae .LBB21_2
2866; AVX-32-NEXT:  # %bb.1:
2867; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2868; AVX-32-NEXT:  .LBB21_2:
2869; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
2870; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
2871; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
2872; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
2873; AVX-32-NEXT:    wait
2874; AVX-32-NEXT:    setae %al
2875; AVX-32-NEXT:    movzbl %al, %eax
2876; AVX-32-NEXT:    shll $31, %eax
2877; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
2878; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
2879; AVX-32-NEXT:    jae .LBB21_4
2880; AVX-32-NEXT:  # %bb.3:
2881; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2882; AVX-32-NEXT:  .LBB21_4:
2883; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
2884; AVX-32-NEXT:    vmovss %xmm0, (%esp)
2885; AVX-32-NEXT:    flds (%esp)
2886; AVX-32-NEXT:    fisttpll (%esp)
2887; AVX-32-NEXT:    wait
2888; AVX-32-NEXT:    setae %cl
2889; AVX-32-NEXT:    movzbl %cl, %ecx
2890; AVX-32-NEXT:    shll $31, %ecx
2891; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
2892; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2893; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
2894; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
2895; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
2896; AVX-32-NEXT:    movl %ebp, %esp
2897; AVX-32-NEXT:    popl %ebp
2898; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
2899; AVX-32-NEXT:    retl
2900;
2901; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2902; AVX-64:       # %bb.0:
2903; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
2904; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
2905; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2906; AVX-64-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2907; AVX-64-NEXT:    jb .LBB21_2
2908; AVX-64-NEXT:  # %bb.1:
2909; AVX-64-NEXT:    vmovaps %xmm1, %xmm3
2910; AVX-64-NEXT:  .LBB21_2:
2911; AVX-64-NEXT:    vsubss %xmm3, %xmm0, %xmm3
2912; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
2913; AVX-64-NEXT:    setae %cl
2914; AVX-64-NEXT:    movzbl %cl, %ecx
2915; AVX-64-NEXT:    shlq $63, %rcx
2916; AVX-64-NEXT:    xorq %rax, %rcx
2917; AVX-64-NEXT:    vmovq %rcx, %xmm3
2918; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2919; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
2920; AVX-64-NEXT:    jb .LBB21_4
2921; AVX-64-NEXT:  # %bb.3:
2922; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
2923; AVX-64-NEXT:  .LBB21_4:
2924; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
2925; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
2926; AVX-64-NEXT:    setae %cl
2927; AVX-64-NEXT:    movzbl %cl, %ecx
2928; AVX-64-NEXT:    shlq $63, %rcx
2929; AVX-64-NEXT:    xorq %rax, %rcx
2930; AVX-64-NEXT:    vmovq %rcx, %xmm0
2931; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
2932; AVX-64-NEXT:    retq
2933;
2934; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2935; AVX512F:       # %bb.0:
2936; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
2937; AVX512F-NEXT:    andl $1, %eax
2938; AVX512F-NEXT:    kmovw %eax, %k0
2939; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2940; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
2941; AVX512F-NEXT:    kmovw %eax, %k1
2942; AVX512F-NEXT:    kshiftlw $1, %k1, %k1
2943; AVX512F-NEXT:    korw %k1, %k0, %k1
2944; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2945; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2946; AVX512F-NEXT:    vzeroupper
2947; AVX512F-NEXT:    ret{{[l|q]}}
2948;
2949; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2950; AVX512VL:       # %bb.0:
2951; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
2952; AVX512VL-NEXT:    andl $1, %eax
2953; AVX512VL-NEXT:    kmovw %eax, %k0
2954; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2955; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
2956; AVX512VL-NEXT:    kmovw %eax, %k1
2957; AVX512VL-NEXT:    kshiftlw $1, %k1, %k1
2958; AVX512VL-NEXT:    korw %k1, %k0, %k1
2959; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
2960; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2961; AVX512VL-NEXT:    ret{{[l|q]}}
2962;
2963; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2964; AVX512DQ:       # %bb.0:
2965; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2966; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
2967; AVX512DQ-NEXT:    kmovw %eax, %k0
2968; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
2969; AVX512DQ-NEXT:    vcvttss2si %xmm0, %eax
2970; AVX512DQ-NEXT:    kmovw %eax, %k1
2971; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
2972; AVX512DQ-NEXT:    kshiftrb $7, %k1, %k1
2973; AVX512DQ-NEXT:    korw %k0, %k1, %k0
2974; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2975; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2976; AVX512DQ-NEXT:    vzeroupper
2977; AVX512DQ-NEXT:    ret{{[l|q]}}
2978;
2979; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2980; AVX512VLDQ:       # %bb.0:
2981; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2982; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
2983; AVX512VLDQ-NEXT:    kmovw %eax, %k0
2984; AVX512VLDQ-NEXT:    kshiftlb $1, %k0, %k0
2985; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %eax
2986; AVX512VLDQ-NEXT:    kmovw %eax, %k1
2987; AVX512VLDQ-NEXT:    kshiftlb $7, %k1, %k1
2988; AVX512VLDQ-NEXT:    kshiftrb $7, %k1, %k1
2989; AVX512VLDQ-NEXT:    korw %k0, %k1, %k0
2990; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
2991; AVX512VLDQ-NEXT:    ret{{[l|q]}}
2992  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a,
2993                                              metadata !"fpexcept.strict") #0
2994  ret <2 x i1> %ret
2995}
2996
2997define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
2998; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
2999; SSE-32:       # %bb.0:
3000; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3001; SSE-32-NEXT:    retl
3002;
3003; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3004; SSE-64:       # %bb.0:
3005; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3006; SSE-64-NEXT:    retq
3007;
3008; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3009; AVX:       # %bb.0:
3010; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3011; AVX-NEXT:    ret{{[l|q]}}
3012;
3013; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3014; AVX512F:       # %bb.0:
3015; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
3016; AVX512F-NEXT:    ret{{[l|q]}}
3017;
3018; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3019; AVX512VL:       # %bb.0:
3020; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
3021; AVX512VL-NEXT:    ret{{[l|q]}}
3022;
3023; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3024; AVX512DQ:       # %bb.0:
3025; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3026; AVX512DQ-NEXT:    ret{{[l|q]}}
3027;
3028; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3029; AVX512VLDQ:       # %bb.0:
3030; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3031; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3032  %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a,
3033                                              metadata !"fpexcept.strict") #0
3034  ret <4 x i32> %ret
3035}
3036
3037define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
3038; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3039; SSE-32:       # %bb.0:
3040; SSE-32-NEXT:    movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3041; SSE-32-NEXT:    movaps %xmm0, %xmm2
3042; SSE-32-NEXT:    cmpltps %xmm1, %xmm2
3043; SSE-32-NEXT:    movaps %xmm2, %xmm3
3044; SSE-32-NEXT:    andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
3045; SSE-32-NEXT:    andnps %xmm1, %xmm2
3046; SSE-32-NEXT:    subps %xmm2, %xmm0
3047; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3048; SSE-32-NEXT:    xorps %xmm3, %xmm0
3049; SSE-32-NEXT:    retl
3050;
3051; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3052; SSE-64:       # %bb.0:
3053; SSE-64-NEXT:    movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3054; SSE-64-NEXT:    movaps %xmm0, %xmm2
3055; SSE-64-NEXT:    cmpltps %xmm1, %xmm2
3056; SSE-64-NEXT:    movaps %xmm2, %xmm3
3057; SSE-64-NEXT:    andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
3058; SSE-64-NEXT:    andnps %xmm1, %xmm2
3059; SSE-64-NEXT:    subps %xmm2, %xmm0
3060; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3061; SSE-64-NEXT:    xorps %xmm3, %xmm0
3062; SSE-64-NEXT:    retq
3063;
3064; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3065; AVX:       # %bb.0:
3066; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3067; AVX-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
3068; AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
3069; AVX-NEXT:    vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
3070; AVX-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm4
3071; AVX-NEXT:    vblendvps %xmm2, %xmm3, %xmm1, %xmm1
3072; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
3073; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3074; AVX-NEXT:    vxorps %xmm4, %xmm0, %xmm0
3075; AVX-NEXT:    ret{{[l|q]}}
3076;
3077; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3078; AVX512F:       # %bb.0:
3079; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
3080; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
3081; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3082; AVX512F-NEXT:    vzeroupper
3083; AVX512F-NEXT:    ret{{[l|q]}}
3084;
3085; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3086; AVX512VL:       # %bb.0:
3087; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
3088; AVX512VL-NEXT:    ret{{[l|q]}}
3089;
3090; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3091; AVX512DQ:       # %bb.0:
3092; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
3093; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
3094; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3095; AVX512DQ-NEXT:    vzeroupper
3096; AVX512DQ-NEXT:    ret{{[l|q]}}
3097;
3098; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3099; AVX512VLDQ:       # %bb.0:
3100; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
3101; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3102  %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a,
3103                                              metadata !"fpexcept.strict") #0
3104  ret <4 x i32> %ret
3105}
3106
3107define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 {
3108; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3109; SSE-32:       # %bb.0:
3110; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3111; SSE-32-NEXT:    packssdw %xmm0, %xmm0
3112; SSE-32-NEXT:    packsswb %xmm0, %xmm0
3113; SSE-32-NEXT:    retl
3114;
3115; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3116; SSE-64:       # %bb.0:
3117; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3118; SSE-64-NEXT:    packssdw %xmm0, %xmm0
3119; SSE-64-NEXT:    packsswb %xmm0, %xmm0
3120; SSE-64-NEXT:    retq
3121;
3122; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3123; AVX:       # %bb.0:
3124; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3125; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3126; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3127; AVX-NEXT:    ret{{[l|q]}}
3128;
3129; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3130; AVX512F:       # %bb.0:
3131; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
3132; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3133; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3134; AVX512F-NEXT:    ret{{[l|q]}}
3135;
3136; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3137; AVX512VL:       # %bb.0:
3138; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
3139; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
3140; AVX512VL-NEXT:    ret{{[l|q]}}
3141;
3142; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3143; AVX512DQ:       # %bb.0:
3144; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3145; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3146; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3147; AVX512DQ-NEXT:    ret{{[l|q]}}
3148;
3149; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3150; AVX512VLDQ:       # %bb.0:
3151; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3152; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
3153; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3154  %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a,
3155                                              metadata !"fpexcept.strict") #0
3156  ret <4 x i8> %ret
3157}
3158
3159define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 {
3160; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3161; SSE-32:       # %bb.0:
3162; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3163; SSE-32-NEXT:    packuswb %xmm0, %xmm0
3164; SSE-32-NEXT:    packuswb %xmm0, %xmm0
3165; SSE-32-NEXT:    retl
3166;
3167; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3168; SSE-64:       # %bb.0:
3169; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3170; SSE-64-NEXT:    packuswb %xmm0, %xmm0
3171; SSE-64-NEXT:    packuswb %xmm0, %xmm0
3172; SSE-64-NEXT:    retq
3173;
3174; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3175; AVX:       # %bb.0:
3176; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3177; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
3178; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
3179; AVX-NEXT:    ret{{[l|q]}}
3180;
3181; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3182; AVX512F:       # %bb.0:
3183; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
3184; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
3185; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
3186; AVX512F-NEXT:    ret{{[l|q]}}
3187;
3188; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3189; AVX512VL:       # %bb.0:
3190; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
3191; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
3192; AVX512VL-NEXT:    ret{{[l|q]}}
3193;
3194; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3195; AVX512DQ:       # %bb.0:
3196; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3197; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
3198; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
3199; AVX512DQ-NEXT:    ret{{[l|q]}}
3200;
3201; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3202; AVX512VLDQ:       # %bb.0:
3203; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3204; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
3205; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3206  %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a,
3207                                              metadata !"fpexcept.strict") #0
3208  ret <4 x i8> %ret
3209}
3210
3211define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 {
3212; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3213; SSE-32:       # %bb.0:
3214; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3215; SSE-32-NEXT:    retl
3216;
3217; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3218; SSE-64:       # %bb.0:
3219; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3220; SSE-64-NEXT:    retq
3221;
3222; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3223; AVX:       # %bb.0:
3224; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3225; AVX-NEXT:    ret{{[l|q]}}
3226;
3227; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3228; AVX512F:       # %bb.0:
3229; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
3230; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
3231; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
3232; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3233; AVX512F-NEXT:    vzeroupper
3234; AVX512F-NEXT:    ret{{[l|q]}}
3235;
3236; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3237; AVX512VL:       # %bb.0:
3238; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
3239; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
3240; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
3241; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
3242; AVX512VL-NEXT:    ret{{[l|q]}}
3243;
3244; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3245; AVX512DQ:       # %bb.0:
3246; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3247; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
3248; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
3249; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3250; AVX512DQ-NEXT:    vzeroupper
3251; AVX512DQ-NEXT:    ret{{[l|q]}}
3252;
3253; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3254; AVX512VLDQ:       # %bb.0:
3255; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3256; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
3257; AVX512VLDQ-NEXT:    vpmovm2d %k0, %xmm0
3258; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3259  %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a,
3260                                              metadata !"fpexcept.strict") #0
3261  ret <4 x i1> %ret
3262}
3263
3264define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 {
3265; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3266; SSE-32:       # %bb.0:
3267; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
3268; SSE-32-NEXT:    retl
3269;
3270; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3271; SSE-64:       # %bb.0:
3272; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
3273; SSE-64-NEXT:    retq
3274;
3275; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3276; AVX:       # %bb.0:
3277; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
3278; AVX-NEXT:    ret{{[l|q]}}
3279;
3280; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3281; AVX512F:       # %bb.0:
3282; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
3283; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
3284; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
3285; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
3286; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3287; AVX512F-NEXT:    vzeroupper
3288; AVX512F-NEXT:    ret{{[l|q]}}
3289;
3290; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3291; AVX512VL:       # %bb.0:
3292; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
3293; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
3294; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
3295; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
3296; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
3297; AVX512VL-NEXT:    ret{{[l|q]}}
3298;
3299; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3300; AVX512DQ:       # %bb.0:
3301; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3302; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
3303; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
3304; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
3305; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
3306; AVX512DQ-NEXT:    vzeroupper
3307; AVX512DQ-NEXT:    ret{{[l|q]}}
3308;
3309; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3310; AVX512VLDQ:       # %bb.0:
3311; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
3312; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
3313; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
3314; AVX512VLDQ-NEXT:    vpmovm2d %k0, %xmm0
3315; AVX512VLDQ-NEXT:    ret{{[l|q]}}
3316  %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a,
3317                                              metadata !"fpexcept.strict") #0
3318  ret <4 x i1> %ret
3319}
3320
3321attributes #0 = { strictfp }
3322