xref: /llvm-project/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
6
7declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double>, metadata)
8declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double>, metadata)
9declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float>, metadata)
10declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float>, metadata)
11declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double>, metadata)
12declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double>, metadata)
13declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double>, metadata)
14declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double>, metadata)
15declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double>, metadata)
16declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double>, metadata)
17declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double>, metadata)
18declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double>, metadata)
19declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata)
20declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata)
21
22declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float>, metadata)
23declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float>, metadata)
24declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float>, metadata)
25declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float>, metadata)
26declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float>, metadata)
27declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float>, metadata)
28declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float>, metadata)
29declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float>, metadata)
30
31define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 {
32; AVX512VL-32-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
33; AVX512VL-32:       # %bb.0:
34; AVX512VL-32-NEXT:    pushl %ebp
35; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
36; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
37; AVX512VL-32-NEXT:    movl %esp, %ebp
38; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
39; AVX512VL-32-NEXT:    andl $-8, %esp
40; AVX512VL-32-NEXT:    subl $64, %esp
41; AVX512VL-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
42; AVX512VL-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
43; AVX512VL-32-NEXT:    vmovhps %xmm1, {{[0-9]+}}(%esp)
44; AVX512VL-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
45; AVX512VL-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
46; AVX512VL-32-NEXT:    vmovhps %xmm1, (%esp)
47; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
48; AVX512VL-32-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp)
49; AVX512VL-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
50; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
51; AVX512VL-32-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp)
52; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
53; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
54; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
55; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
56; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
57; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
58; AVX512VL-32-NEXT:    fldl (%esp)
59; AVX512VL-32-NEXT:    fisttpll (%esp)
60; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
61; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
62; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
63; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
64; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
65; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
66; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
67; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
68; AVX512VL-32-NEXT:    wait
69; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
70; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
71; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
72; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
73; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
74; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
75; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
76; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
77; AVX512VL-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
78; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
79; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
80; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
81; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
82; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
83; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
84; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
85; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
86; AVX512VL-32-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
87; AVX512VL-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
88; AVX512VL-32-NEXT:    movl %ebp, %esp
89; AVX512VL-32-NEXT:    popl %ebp
90; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
91; AVX512VL-32-NEXT:    retl
92;
93; AVX512VL-64-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
94; AVX512VL-64:       # %bb.0:
95; AVX512VL-64-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
96; AVX512VL-64-NEXT:    vcvttsd2si %xmm1, %rax
97; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
98; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
99; AVX512VL-64-NEXT:    vcvttsd2si %xmm1, %rax
100; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
101; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
102; AVX512VL-64-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
103; AVX512VL-64-NEXT:    vcvttsd2si %xmm2, %rax
104; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
105; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
106; AVX512VL-64-NEXT:    vcvttsd2si %xmm2, %rax
107; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
108; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
109; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
110; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm2
111; AVX512VL-64-NEXT:    vcvttsd2si %xmm2, %rax
112; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
113; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
114; AVX512VL-64-NEXT:    vcvttsd2si %xmm2, %rax
115; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
116; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
117; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
118; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
119; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
120; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
121; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
122; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
123; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
124; AVX512VL-64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
125; AVX512VL-64-NEXT:    retq
126;
127; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
128; AVX512DQ:       # %bb.0:
129; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
130; AVX512DQ-NEXT:    ret{{[l|q]}}
131  %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double> %a,
132                                              metadata !"fpexcept.strict") #0
133  ret <8 x i64> %ret
134}
135
136define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
137; AVX512VL-32-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
138; AVX512VL-32:       # %bb.0:
139; AVX512VL-32-NEXT:    pushl %ebp
140; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
141; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
142; AVX512VL-32-NEXT:    movl %esp, %ebp
143; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
144; AVX512VL-32-NEXT:    pushl %ebx
145; AVX512VL-32-NEXT:    pushl %edi
146; AVX512VL-32-NEXT:    pushl %esi
147; AVX512VL-32-NEXT:    andl $-8, %esp
148; AVX512VL-32-NEXT:    subl $80, %esp
149; AVX512VL-32-NEXT:    .cfi_offset %esi, -20
150; AVX512VL-32-NEXT:    .cfi_offset %edi, -16
151; AVX512VL-32-NEXT:    .cfi_offset %ebx, -12
152; AVX512VL-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
153; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm2[1,0]
154; AVX512VL-32-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
155; AVX512VL-32-NEXT:    xorl %eax, %eax
156; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm3
157; AVX512VL-32-NEXT:    setae %al
158; AVX512VL-32-NEXT:    kmovw %eax, %k1
159; AVX512VL-32-NEXT:    movl %eax, %esi
160; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z}
161; AVX512VL-32-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
162; AVX512VL-32-NEXT:    vmovsd %xmm3, (%esp)
163; AVX512VL-32-NEXT:    xorl %ebx, %ebx
164; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm2
165; AVX512VL-32-NEXT:    setae %bl
166; AVX512VL-32-NEXT:    kmovw %ebx, %k1
167; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
168; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
169; AVX512VL-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
170; AVX512VL-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
171; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm2[1,0]
172; AVX512VL-32-NEXT:    xorl %eax, %eax
173; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm3
174; AVX512VL-32-NEXT:    setae %al
175; AVX512VL-32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
176; AVX512VL-32-NEXT:    kmovw %eax, %k1
177; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z}
178; AVX512VL-32-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
179; AVX512VL-32-NEXT:    vmovsd %xmm3, {{[0-9]+}}(%esp)
180; AVX512VL-32-NEXT:    xorl %edx, %edx
181; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm2
182; AVX512VL-32-NEXT:    setae %dl
183; AVX512VL-32-NEXT:    kmovw %edx, %k1
184; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
185; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
186; AVX512VL-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
187; AVX512VL-32-NEXT:    vextractf128 $1, %ymm0, %xmm2
188; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm2[1,0]
189; AVX512VL-32-NEXT:    xorl %eax, %eax
190; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm3
191; AVX512VL-32-NEXT:    setae %al
192; AVX512VL-32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
193; AVX512VL-32-NEXT:    kmovw %eax, %k1
194; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z}
195; AVX512VL-32-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
196; AVX512VL-32-NEXT:    vmovsd %xmm3, {{[0-9]+}}(%esp)
197; AVX512VL-32-NEXT:    xorl %ecx, %ecx
198; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm2
199; AVX512VL-32-NEXT:    setae %cl
200; AVX512VL-32-NEXT:    kmovw %ecx, %k1
201; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
202; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
203; AVX512VL-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
204; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
205; AVX512VL-32-NEXT:    xorl %eax, %eax
206; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm2
207; AVX512VL-32-NEXT:    setae %al
208; AVX512VL-32-NEXT:    kmovw %eax, %k1
209; AVX512VL-32-NEXT:    movl %eax, %edi
210; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
211; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
212; AVX512VL-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
213; AVX512VL-32-NEXT:    xorl %eax, %eax
214; AVX512VL-32-NEXT:    vcomisd %xmm1, %xmm0
215; AVX512VL-32-NEXT:    setae %al
216; AVX512VL-32-NEXT:    kmovw %eax, %k1
217; AVX512VL-32-NEXT:    vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z}
218; AVX512VL-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
219; AVX512VL-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
220; AVX512VL-32-NEXT:    fldl (%esp)
221; AVX512VL-32-NEXT:    fisttpll (%esp)
222; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
223; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
224; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
225; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
226; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
227; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
228; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
229; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
230; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
231; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
232; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
233; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
234; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
235; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
236; AVX512VL-32-NEXT:    wait
237; AVX512VL-32-NEXT:    shll $31, %ebx
238; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
239; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
240; AVX512VL-32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
241; AVX512VL-32-NEXT:    shll $31, %esi
242; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %esi
243; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
244; AVX512VL-32-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
245; AVX512VL-32-NEXT:    shll $31, %edx
246; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
247; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
248; AVX512VL-32-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
249; AVX512VL-32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
250; AVX512VL-32-NEXT:    shll $31, %edx
251; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
252; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
253; AVX512VL-32-NEXT:    vpinsrd $3, %edx, %xmm1, %xmm1
254; AVX512VL-32-NEXT:    shll $31, %ecx
255; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
256; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
257; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
258; AVX512VL-32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
259; AVX512VL-32-NEXT:    shll $31, %ecx
260; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
261; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
262; AVX512VL-32-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm2
263; AVX512VL-32-NEXT:    shll $31, %eax
264; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
265; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
266; AVX512VL-32-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
267; AVX512VL-32-NEXT:    shll $31, %edi
268; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edi
269; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
270; AVX512VL-32-NEXT:    vpinsrd $3, %edi, %xmm3, %xmm3
271; AVX512VL-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
272; AVX512VL-32-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm1
273; AVX512VL-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
274; AVX512VL-32-NEXT:    leal -12(%ebp), %esp
275; AVX512VL-32-NEXT:    popl %esi
276; AVX512VL-32-NEXT:    popl %edi
277; AVX512VL-32-NEXT:    popl %ebx
278; AVX512VL-32-NEXT:    popl %ebp
279; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
280; AVX512VL-32-NEXT:    retl
281;
282; AVX512VL-64-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
283; AVX512VL-64:       # %bb.0:
284; AVX512VL-64-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
285; AVX512VL-64-NEXT:    vcvttsd2usi %xmm1, %rax
286; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
287; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
288; AVX512VL-64-NEXT:    vcvttsd2usi %xmm1, %rax
289; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
290; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
291; AVX512VL-64-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
292; AVX512VL-64-NEXT:    vcvttsd2usi %xmm2, %rax
293; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
294; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
295; AVX512VL-64-NEXT:    vcvttsd2usi %xmm2, %rax
296; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
297; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
298; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
299; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm2
300; AVX512VL-64-NEXT:    vcvttsd2usi %xmm2, %rax
301; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
302; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
303; AVX512VL-64-NEXT:    vcvttsd2usi %xmm2, %rax
304; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
305; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
306; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
307; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
308; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
309; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
310; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
311; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
312; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
313; AVX512VL-64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
314; AVX512VL-64-NEXT:    retq
315;
316; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
317; AVX512DQ:       # %bb.0:
318; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
319; AVX512DQ-NEXT:    ret{{[l|q]}}
320  %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double> %a,
321                                              metadata !"fpexcept.strict") #0
322  ret <8 x i64> %ret
323}
324
325define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
326; AVX512VL-32-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
327; AVX512VL-32:       # %bb.0:
328; AVX512VL-32-NEXT:    pushl %ebp
329; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
330; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
331; AVX512VL-32-NEXT:    movl %esp, %ebp
332; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
333; AVX512VL-32-NEXT:    andl $-8, %esp
334; AVX512VL-32-NEXT:    subl $64, %esp
335; AVX512VL-32-NEXT:    vextracti128 $1, %ymm0, %xmm1
336; AVX512VL-32-NEXT:    vmovd %xmm1, {{[0-9]+}}(%esp)
337; AVX512VL-32-NEXT:    vextractps $1, %xmm1, {{[0-9]+}}(%esp)
338; AVX512VL-32-NEXT:    vextractps $2, %xmm1, {{[0-9]+}}(%esp)
339; AVX512VL-32-NEXT:    vextractps $3, %xmm1, (%esp)
340; AVX512VL-32-NEXT:    vmovd %xmm0, {{[0-9]+}}(%esp)
341; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
342; AVX512VL-32-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp)
343; AVX512VL-32-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp)
344; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
345; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
346; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
347; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
348; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
349; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
350; AVX512VL-32-NEXT:    flds (%esp)
351; AVX512VL-32-NEXT:    fisttpll (%esp)
352; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
353; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
354; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
355; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
356; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
357; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
358; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
359; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
360; AVX512VL-32-NEXT:    wait
361; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
362; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
363; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
364; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
365; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
366; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
367; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
368; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
369; AVX512VL-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
370; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
371; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
372; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
373; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
374; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
375; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
376; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
377; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
378; AVX512VL-32-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
379; AVX512VL-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
380; AVX512VL-32-NEXT:    movl %ebp, %esp
381; AVX512VL-32-NEXT:    popl %ebp
382; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
383; AVX512VL-32-NEXT:    retl
384;
385; AVX512VL-64-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
386; AVX512VL-64:       # %bb.0:
387; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
388; AVX512VL-64-NEXT:    vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
389; AVX512VL-64-NEXT:    vcvttss2si %xmm2, %rax
390; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
391; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
392; AVX512VL-64-NEXT:    vcvttss2si %xmm3, %rax
393; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
394; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
395; AVX512VL-64-NEXT:    vcvttss2si %xmm1, %rax
396; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
397; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
398; AVX512VL-64-NEXT:    vcvttss2si %xmm1, %rax
399; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
400; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
401; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
402; AVX512VL-64-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
403; AVX512VL-64-NEXT:    vcvttss2si %xmm2, %rax
404; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
405; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
406; AVX512VL-64-NEXT:    vcvttss2si %xmm3, %rax
407; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
408; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
409; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
410; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
411; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
412; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
413; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
414; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
415; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
416; AVX512VL-64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
417; AVX512VL-64-NEXT:    retq
418;
419; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
420; AVX512DQ:       # %bb.0:
421; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
422; AVX512DQ-NEXT:    ret{{[l|q]}}
423  %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float> %a,
424                                              metadata !"fpexcept.strict") #0
425  ret <8 x i64> %ret
426}
427
428define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
429; AVX512VL-32-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
430; AVX512VL-32:       # %bb.0:
431; AVX512VL-32-NEXT:    pushl %ebp
432; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
433; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
434; AVX512VL-32-NEXT:    movl %esp, %ebp
435; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
436; AVX512VL-32-NEXT:    pushl %ebx
437; AVX512VL-32-NEXT:    pushl %edi
438; AVX512VL-32-NEXT:    pushl %esi
439; AVX512VL-32-NEXT:    andl $-8, %esp
440; AVX512VL-32-NEXT:    subl $80, %esp
441; AVX512VL-32-NEXT:    .cfi_offset %esi, -20
442; AVX512VL-32-NEXT:    .cfi_offset %edi, -16
443; AVX512VL-32-NEXT:    .cfi_offset %ebx, -12
444; AVX512VL-32-NEXT:    vextractf128 $1, %ymm0, %xmm2
445; AVX512VL-32-NEXT:    vshufps {{.*#+}} xmm3 = xmm2[3,3,3,3]
446; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
447; AVX512VL-32-NEXT:    xorl %eax, %eax
448; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm3
449; AVX512VL-32-NEXT:    setae %al
450; AVX512VL-32-NEXT:    kmovw %eax, %k1
451; AVX512VL-32-NEXT:    movl %eax, %esi
452; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm4 {%k1} {z}
453; AVX512VL-32-NEXT:    vsubss %xmm4, %xmm3, %xmm3
454; AVX512VL-32-NEXT:    vmovss %xmm3, (%esp)
455; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm2[1,0]
456; AVX512VL-32-NEXT:    xorl %ebx, %ebx
457; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm3
458; AVX512VL-32-NEXT:    setae %bl
459; AVX512VL-32-NEXT:    kmovw %ebx, %k1
460; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm4 {%k1} {z}
461; AVX512VL-32-NEXT:    vsubss %xmm4, %xmm3, %xmm3
462; AVX512VL-32-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
463; AVX512VL-32-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
464; AVX512VL-32-NEXT:    xorl %eax, %eax
465; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm3
466; AVX512VL-32-NEXT:    setae %al
467; AVX512VL-32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
468; AVX512VL-32-NEXT:    kmovw %eax, %k1
469; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm4 {%k1} {z}
470; AVX512VL-32-NEXT:    vsubss %xmm4, %xmm3, %xmm3
471; AVX512VL-32-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
472; AVX512VL-32-NEXT:    xorl %edx, %edx
473; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm2
474; AVX512VL-32-NEXT:    setae %dl
475; AVX512VL-32-NEXT:    kmovw %edx, %k1
476; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
477; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
478; AVX512VL-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
479; AVX512VL-32-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
480; AVX512VL-32-NEXT:    xorl %eax, %eax
481; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm2
482; AVX512VL-32-NEXT:    setae %al
483; AVX512VL-32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
484; AVX512VL-32-NEXT:    kmovw %eax, %k1
485; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
486; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
487; AVX512VL-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
488; AVX512VL-32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
489; AVX512VL-32-NEXT:    xorl %ecx, %ecx
490; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm2
491; AVX512VL-32-NEXT:    setae %cl
492; AVX512VL-32-NEXT:    kmovw %ecx, %k1
493; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
494; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
495; AVX512VL-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
496; AVX512VL-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
497; AVX512VL-32-NEXT:    xorl %eax, %eax
498; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm2
499; AVX512VL-32-NEXT:    setae %al
500; AVX512VL-32-NEXT:    kmovw %eax, %k1
501; AVX512VL-32-NEXT:    movl %eax, %edi
502; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
503; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
504; AVX512VL-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
505; AVX512VL-32-NEXT:    xorl %eax, %eax
506; AVX512VL-32-NEXT:    vcomiss %xmm1, %xmm0
507; AVX512VL-32-NEXT:    setae %al
508; AVX512VL-32-NEXT:    kmovw %eax, %k1
509; AVX512VL-32-NEXT:    vmovss %xmm1, %xmm1, %xmm1 {%k1} {z}
510; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
511; AVX512VL-32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
512; AVX512VL-32-NEXT:    flds (%esp)
513; AVX512VL-32-NEXT:    fisttpll (%esp)
514; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
515; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
516; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
517; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
518; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
519; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
520; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
521; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
522; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
523; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
524; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
525; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
526; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
527; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
528; AVX512VL-32-NEXT:    wait
529; AVX512VL-32-NEXT:    shll $31, %ebx
530; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
531; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
532; AVX512VL-32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
533; AVX512VL-32-NEXT:    shll $31, %esi
534; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %esi
535; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
536; AVX512VL-32-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
537; AVX512VL-32-NEXT:    shll $31, %edx
538; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
539; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
540; AVX512VL-32-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
541; AVX512VL-32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
542; AVX512VL-32-NEXT:    shll $31, %edx
543; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
544; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
545; AVX512VL-32-NEXT:    vpinsrd $3, %edx, %xmm1, %xmm1
546; AVX512VL-32-NEXT:    shll $31, %ecx
547; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
548; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
549; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
550; AVX512VL-32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
551; AVX512VL-32-NEXT:    shll $31, %ecx
552; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
553; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
554; AVX512VL-32-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm2
555; AVX512VL-32-NEXT:    shll $31, %eax
556; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
557; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
558; AVX512VL-32-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
559; AVX512VL-32-NEXT:    shll $31, %edi
560; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edi
561; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
562; AVX512VL-32-NEXT:    vpinsrd $3, %edi, %xmm3, %xmm3
563; AVX512VL-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
564; AVX512VL-32-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm1
565; AVX512VL-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
566; AVX512VL-32-NEXT:    leal -12(%ebp), %esp
567; AVX512VL-32-NEXT:    popl %esi
568; AVX512VL-32-NEXT:    popl %edi
569; AVX512VL-32-NEXT:    popl %ebx
570; AVX512VL-32-NEXT:    popl %ebp
571; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
572; AVX512VL-32-NEXT:    retl
573;
574; AVX512VL-64-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
575; AVX512VL-64:       # %bb.0:
576; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
577; AVX512VL-64-NEXT:    vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
578; AVX512VL-64-NEXT:    vcvttss2usi %xmm2, %rax
579; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
580; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
581; AVX512VL-64-NEXT:    vcvttss2usi %xmm3, %rax
582; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
583; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
584; AVX512VL-64-NEXT:    vcvttss2usi %xmm1, %rax
585; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
586; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
587; AVX512VL-64-NEXT:    vcvttss2usi %xmm1, %rax
588; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
589; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
590; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
591; AVX512VL-64-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
592; AVX512VL-64-NEXT:    vcvttss2usi %xmm2, %rax
593; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
594; AVX512VL-64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
595; AVX512VL-64-NEXT:    vcvttss2usi %xmm3, %rax
596; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
597; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
598; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
599; AVX512VL-64-NEXT:    vmovq %rax, %xmm3
600; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
601; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
602; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
603; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
604; AVX512VL-64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
605; AVX512VL-64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
606; AVX512VL-64-NEXT:    retq
607;
608; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
609; AVX512DQ:       # %bb.0:
610; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
611; AVX512DQ-NEXT:    ret{{[l|q]}}
612  %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float> %a,
613                                              metadata !"fpexcept.strict") #0
614  ret <8 x i64> %ret
615}
616
617define <8 x i32> @strict_vector_fptosi_v8f64_to_v8i32(<8 x double> %a) #0 {
618; CHECK-LABEL: strict_vector_fptosi_v8f64_to_v8i32:
619; CHECK:       # %bb.0:
620; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm0
621; CHECK-NEXT:    ret{{[l|q]}}
622  %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double> %a,
623                                              metadata !"fpexcept.strict") #0
624  ret <8 x i32> %ret
625}
626
627define <8 x i32> @strict_vector_fptoui_v8f64_to_v8i32(<8 x double> %a) #0 {
628; CHECK-LABEL: strict_vector_fptoui_v8f64_to_v8i32:
629; CHECK:       # %bb.0:
630; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm0
631; CHECK-NEXT:    ret{{[l|q]}}
632  %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double> %a,
633                                              metadata !"fpexcept.strict") #0
634  ret <8 x i32> %ret
635}
636
637define <8 x i16> @strict_vector_fptosi_v8f64_to_v8i16(<8 x double> %a) #0 {
638; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i16:
639; AVX512VL:       # %bb.0:
640; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
641; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
642; AVX512VL-NEXT:    vzeroupper
643; AVX512VL-NEXT:    ret{{[l|q]}}
644;
645; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i16:
646; AVX512DQ:       # %bb.0:
647; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
648; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
649; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
650; AVX512DQ-NEXT:    vzeroupper
651; AVX512DQ-NEXT:    ret{{[l|q]}}
652  %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double> %a,
653                                              metadata !"fpexcept.strict") #0
654  ret <8 x i16> %ret
655}
656
657define <8 x i16> @strict_vector_fptoui_v8f64_to_v8i16(<8 x double> %a) #0 {
658; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i16:
659; AVX512VL:       # %bb.0:
660; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
661; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
662; AVX512VL-NEXT:    vzeroupper
663; AVX512VL-NEXT:    ret{{[l|q]}}
664;
665; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i16:
666; AVX512DQ:       # %bb.0:
667; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
668; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
669; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
670; AVX512DQ-NEXT:    vzeroupper
671; AVX512DQ-NEXT:    ret{{[l|q]}}
672  %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double> %a,
673                                              metadata !"fpexcept.strict") #0
674  ret <8 x i16> %ret
675}
676
677define <8 x i8> @strict_vector_fptosi_v8f64_to_v8i8(<8 x double> %a) #0 {
678; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i8:
679; AVX512VL:       # %bb.0:
680; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
681; AVX512VL-NEXT:    vpmovdb %ymm0, %xmm0
682; AVX512VL-NEXT:    vzeroupper
683; AVX512VL-NEXT:    ret{{[l|q]}}
684;
685; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i8:
686; AVX512DQ:       # %bb.0:
687; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
688; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
689; AVX512DQ-NEXT:    vzeroupper
690; AVX512DQ-NEXT:    ret{{[l|q]}}
691  %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double> %a,
692                                              metadata !"fpexcept.strict") #0
693  ret <8 x i8> %ret
694}
695
696define <8 x i8> @strict_vector_fptoui_v8f64_to_v8i8(<8 x double> %a) #0 {
697; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i8:
698; AVX512VL:       # %bb.0:
699; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
700; AVX512VL-NEXT:    vpmovdb %ymm0, %xmm0
701; AVX512VL-NEXT:    vzeroupper
702; AVX512VL-NEXT:    ret{{[l|q]}}
703;
704; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i8:
705; AVX512DQ:       # %bb.0:
706; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
707; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
708; AVX512DQ-NEXT:    vzeroupper
709; AVX512DQ-NEXT:    ret{{[l|q]}}
710  %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double> %a,
711                                              metadata !"fpexcept.strict") #0
712  ret <8 x i8> %ret
713}
714
715define <8 x i1> @strict_vector_fptosi_v8f64_to_v8i1(<8 x double> %a) #0 {
716; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i1:
717; AVX512VL:       # %bb.0:
718; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
719; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
720; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
721; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
722; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
723; AVX512VL-NEXT:    vzeroupper
724; AVX512VL-NEXT:    ret{{[l|q]}}
725;
726; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i1:
727; AVX512DQ:       # %bb.0:
728; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
729; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
730; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
731; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
732; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
733; AVX512DQ-NEXT:    vzeroupper
734; AVX512DQ-NEXT:    ret{{[l|q]}}
735  %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double> %a,
736                                              metadata !"fpexcept.strict") #0
737  ret <8 x i1> %ret
738}
739
740define <8 x i1> @strict_vector_fptoui_v8f64_to_v8i1(<8 x double> %a) #0 {
741; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i1:
742; AVX512VL:       # %bb.0:
743; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
744; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
745; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
746; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
747; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
748; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
749; AVX512VL-NEXT:    vzeroupper
750; AVX512VL-NEXT:    ret{{[l|q]}}
751;
752; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i1:
753; AVX512DQ:       # %bb.0:
754; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
755; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
756; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
757; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
758; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
759; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
760; AVX512DQ-NEXT:    vzeroupper
761; AVX512DQ-NEXT:    ret{{[l|q]}}
762  %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double> %a,
763                                              metadata !"fpexcept.strict") #0
764  ret <8 x i1> %ret
765}
766
767define <16 x i32> @strict_vector_fptosi_v16f32_to_v16i32(<16 x float> %a) #0 {
768; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i32:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
771; CHECK-NEXT:    ret{{[l|q]}}
772  %ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float> %a,
773                                              metadata !"fpexcept.strict") #0
774  ret <16 x i32> %ret
775}
776
777define <16 x i32> @strict_vector_fptoui_v16f32_to_v16i32(<16 x float> %a) #0 {
778; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i32:
779; CHECK:       # %bb.0:
780; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm0
781; CHECK-NEXT:    ret{{[l|q]}}
782  %ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float> %a,
783                                              metadata !"fpexcept.strict") #0
784  ret <16 x i32> %ret
785}
786
787define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 {
788; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i16:
789; CHECK:       # %bb.0:
790; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
791; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
792; CHECK-NEXT:    ret{{[l|q]}}
793  %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float> %a,
794                                              metadata !"fpexcept.strict") #0
795  ret <16 x i16> %ret
796}
797
798define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 {
799; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16:
800; CHECK:       # %bb.0:
801; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm0
802; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
803; CHECK-NEXT:    ret{{[l|q]}}
804  %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,
805                                              metadata !"fpexcept.strict") #0
806  ret <16 x i16> %ret
807}
808
809define <16 x i8> @strict_vector_fptosi_v16f32_to_v16i8(<16 x float> %a) #0 {
810; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i8:
811; CHECK:       # %bb.0:
812; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
813; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
814; CHECK-NEXT:    vzeroupper
815; CHECK-NEXT:    ret{{[l|q]}}
816  %ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float> %a,
817                                              metadata !"fpexcept.strict") #0
818  ret <16 x i8> %ret
819}
820
821define <16 x i8> @strict_vector_fptoui_v16f32_to_v16i8(<16 x float> %a) #0 {
822; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i8:
823; CHECK:       # %bb.0:
824; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
825; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
826; CHECK-NEXT:    vzeroupper
827; CHECK-NEXT:    ret{{[l|q]}}
828  %ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float> %a,
829                                              metadata !"fpexcept.strict") #0
830  ret <16 x i8> %ret
831}
832
833define <16 x i1> @strict_vector_fptosi_v16f32_to_v16i1(<16 x float> %a) #0 {
834; AVX512VL-LABEL: strict_vector_fptosi_v16f32_to_v16i1:
835; AVX512VL:       # %bb.0:
836; AVX512VL-NEXT:    vcvttps2dq %zmm0, %zmm0
837; AVX512VL-NEXT:    vptestmd %zmm0, %zmm0, %k1
838; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
839; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
840; AVX512VL-NEXT:    vzeroupper
841; AVX512VL-NEXT:    ret{{[l|q]}}
842;
843; AVX512DQ-LABEL: strict_vector_fptosi_v16f32_to_v16i1:
844; AVX512DQ:       # %bb.0:
845; AVX512DQ-NEXT:    vcvttps2dq %zmm0, %zmm0
846; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
847; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
848; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
849; AVX512DQ-NEXT:    vzeroupper
850; AVX512DQ-NEXT:    ret{{[l|q]}}
851  %ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float> %a,
852                                              metadata !"fpexcept.strict") #0
853  ret <16 x i1> %ret
854}
855
856define <16 x i1> @strict_vector_fptoui_v16f32_to_v16i1(<16 x float> %a) #0 {
857; AVX512VL-LABEL: strict_vector_fptoui_v16f32_to_v16i1:
858; AVX512VL:       # %bb.0:
859; AVX512VL-NEXT:    vcvttps2dq %zmm0, %zmm0
860; AVX512VL-NEXT:    vpslld $31, %zmm0, %zmm0
861; AVX512VL-NEXT:    vptestmd %zmm0, %zmm0, %k1
862; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
863; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
864; AVX512VL-NEXT:    vzeroupper
865; AVX512VL-NEXT:    ret{{[l|q]}}
866;
867; AVX512DQ-LABEL: strict_vector_fptoui_v16f32_to_v16i1:
868; AVX512DQ:       # %bb.0:
869; AVX512DQ-NEXT:    vcvttps2dq %zmm0, %zmm0
870; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
871; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
872; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
873; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
874; AVX512DQ-NEXT:    vzeroupper
875; AVX512DQ-NEXT:    ret{{[l|q]}}
876  %ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float> %a,
877                                              metadata !"fpexcept.strict") #0
878  ret <16 x i1> %ret
879}
880
881
882attributes #0 = { strictfp }
883