xref: /llvm-project/llvm/test/CodeGen/X86/ftrunc.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2    | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1  | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx     | FileCheck %s --check-prefixes=AVX,X64-AVX1
5; RUN: llc < %s -mtriple=i686--   -mattr=+avx     | FileCheck %s --check-prefixes=AVX,X86-AVX1
6
7declare i32 @llvm.fptoui.sat.i32.f32(float)
8declare i64 @llvm.fptosi.sat.i64.f64(double)
9
10define float @trunc_unsigned_f32(float %x) #0 {
11; SSE2-LABEL: trunc_unsigned_f32:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    cvttss2si %xmm0, %rax
14; SSE2-NEXT:    movl %eax, %eax
15; SSE2-NEXT:    xorps %xmm0, %xmm0
16; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
17; SSE2-NEXT:    retq
18;
19; SSE41-LABEL: trunc_unsigned_f32:
20; SSE41:       # %bb.0:
21; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
22; SSE41-NEXT:    retq
23;
24; X64-AVX1-LABEL: trunc_unsigned_f32:
25; X64-AVX1:       # %bb.0:
26; X64-AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
27; X64-AVX1-NEXT:    retq
28;
29; X86-AVX1-LABEL: trunc_unsigned_f32:
30; X86-AVX1:       # %bb.0:
31; X86-AVX1-NEXT:    pushl %eax
32; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33; X86-AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
34; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
35; X86-AVX1-NEXT:    flds (%esp)
36; X86-AVX1-NEXT:    popl %eax
37; X86-AVX1-NEXT:    retl
38  %i = fptoui float %x to i32
39  %r = uitofp i32 %i to float
40  ret float %r
41}
42
43define double @trunc_unsigned_f64(double %x) #0 {
44; SSE2-LABEL: trunc_unsigned_f64:
45; SSE2:       # %bb.0:
46; SSE2-NEXT:    cvttsd2si %xmm0, %rax
47; SSE2-NEXT:    movq %rax, %rcx
48; SSE2-NEXT:    sarq $63, %rcx
49; SSE2-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
50; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
51; SSE2-NEXT:    andq %rcx, %rdx
52; SSE2-NEXT:    orq %rax, %rdx
53; SSE2-NEXT:    movq %rdx, %xmm1
54; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
55; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
56; SSE2-NEXT:    movapd %xmm1, %xmm0
57; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
58; SSE2-NEXT:    addsd %xmm1, %xmm0
59; SSE2-NEXT:    retq
60;
61; SSE41-LABEL: trunc_unsigned_f64:
62; SSE41:       # %bb.0:
63; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
64; SSE41-NEXT:    retq
65;
66; X64-AVX1-LABEL: trunc_unsigned_f64:
67; X64-AVX1:       # %bb.0:
68; X64-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
69; X64-AVX1-NEXT:    retq
70;
71; X86-AVX1-LABEL: trunc_unsigned_f64:
72; X86-AVX1:       # %bb.0:
73; X86-AVX1-NEXT:    pushl %ebp
74; X86-AVX1-NEXT:    movl %esp, %ebp
75; X86-AVX1-NEXT:    andl $-8, %esp
76; X86-AVX1-NEXT:    subl $8, %esp
77; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
78; X86-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
79; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp)
80; X86-AVX1-NEXT:    fldl (%esp)
81; X86-AVX1-NEXT:    movl %ebp, %esp
82; X86-AVX1-NEXT:    popl %ebp
83; X86-AVX1-NEXT:    retl
84  %i = fptoui double %x to i64
85  %r = uitofp i64 %i to double
86  ret double %r
87}
88
89define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
90; SSE2-LABEL: trunc_unsigned_v4f32:
91; SSE2:       # %bb.0:
92; SSE2-NEXT:    cvttps2dq %xmm0, %xmm1
93; SSE2-NEXT:    movdqa %xmm1, %xmm2
94; SSE2-NEXT:    psrad $31, %xmm2
95; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
97; SSE2-NEXT:    pand %xmm2, %xmm0
98; SSE2-NEXT:    por %xmm1, %xmm0
99; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
100; SSE2-NEXT:    pand %xmm0, %xmm1
101; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
102; SSE2-NEXT:    psrld $16, %xmm0
103; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
104; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
105; SSE2-NEXT:    addps %xmm1, %xmm0
106; SSE2-NEXT:    retq
107;
108; SSE41-LABEL: trunc_unsigned_v4f32:
109; SSE41:       # %bb.0:
110; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
111; SSE41-NEXT:    retq
112;
113; AVX-LABEL: trunc_unsigned_v4f32:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vroundps $11, %xmm0, %xmm0
116; AVX-NEXT:    ret{{[l|q]}}
117  %i = fptoui <4 x float> %x to <4 x i32>
118  %r = uitofp <4 x i32> %i to <4 x float>
119  ret <4 x float> %r
120}
121
122define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
123; SSE2-LABEL: trunc_unsigned_v2f64:
124; SSE2:       # %bb.0:
125; SSE2-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
126; SSE2-NEXT:    movapd %xmm0, %xmm1
127; SSE2-NEXT:    subsd %xmm2, %xmm1
128; SSE2-NEXT:    cvttsd2si %xmm1, %rax
129; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
130; SSE2-NEXT:    movq %rcx, %rdx
131; SSE2-NEXT:    sarq $63, %rdx
132; SSE2-NEXT:    andq %rax, %rdx
133; SSE2-NEXT:    orq %rcx, %rdx
134; SSE2-NEXT:    movq %rdx, %xmm1
135; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
136; SSE2-NEXT:    cvttsd2si %xmm0, %rax
137; SSE2-NEXT:    subsd %xmm2, %xmm0
138; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
139; SSE2-NEXT:    movq %rax, %rdx
140; SSE2-NEXT:    sarq $63, %rdx
141; SSE2-NEXT:    andq %rcx, %rdx
142; SSE2-NEXT:    orq %rax, %rdx
143; SSE2-NEXT:    movq %rdx, %xmm0
144; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
145; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
146; SSE2-NEXT:    pand %xmm1, %xmm0
147; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148; SSE2-NEXT:    psrlq $32, %xmm1
149; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
150; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
151; SSE2-NEXT:    addpd %xmm0, %xmm1
152; SSE2-NEXT:    movapd %xmm1, %xmm0
153; SSE2-NEXT:    retq
154;
155; SSE41-LABEL: trunc_unsigned_v2f64:
156; SSE41:       # %bb.0:
157; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
158; SSE41-NEXT:    retq
159;
160; AVX-LABEL: trunc_unsigned_v2f64:
161; AVX:       # %bb.0:
162; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0
163; AVX-NEXT:    ret{{[l|q]}}
164  %i = fptoui <2 x double> %x to <2 x i64>
165  %r = uitofp <2 x i64> %i to <2 x double>
166  ret <2 x double> %r
167}
168
169define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
170; SSE2-LABEL: trunc_unsigned_v4f64:
171; SSE2:       # %bb.0:
172; SSE2-NEXT:    movapd %xmm1, %xmm2
173; SSE2-NEXT:    movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0]
174; SSE2-NEXT:    subsd %xmm3, %xmm1
175; SSE2-NEXT:    cvttsd2si %xmm1, %rax
176; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
177; SSE2-NEXT:    movq %rcx, %rdx
178; SSE2-NEXT:    sarq $63, %rdx
179; SSE2-NEXT:    andq %rax, %rdx
180; SSE2-NEXT:    orq %rcx, %rdx
181; SSE2-NEXT:    movq %rdx, %xmm1
182; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
183; SSE2-NEXT:    cvttsd2si %xmm2, %rax
184; SSE2-NEXT:    subsd %xmm3, %xmm2
185; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
186; SSE2-NEXT:    movq %rax, %rdx
187; SSE2-NEXT:    sarq $63, %rdx
188; SSE2-NEXT:    andq %rcx, %rdx
189; SSE2-NEXT:    orq %rax, %rdx
190; SSE2-NEXT:    movq %rdx, %xmm2
191; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
192; SSE2-NEXT:    movapd %xmm0, %xmm2
193; SSE2-NEXT:    subsd %xmm3, %xmm2
194; SSE2-NEXT:    cvttsd2si %xmm2, %rax
195; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
196; SSE2-NEXT:    movq %rcx, %rdx
197; SSE2-NEXT:    sarq $63, %rdx
198; SSE2-NEXT:    andq %rax, %rdx
199; SSE2-NEXT:    orq %rcx, %rdx
200; SSE2-NEXT:    movq %rdx, %xmm2
201; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
202; SSE2-NEXT:    cvttsd2si %xmm0, %rax
203; SSE2-NEXT:    subsd %xmm3, %xmm0
204; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
205; SSE2-NEXT:    movq %rax, %rdx
206; SSE2-NEXT:    sarq $63, %rdx
207; SSE2-NEXT:    andq %rcx, %rdx
208; SSE2-NEXT:    orq %rax, %rdx
209; SSE2-NEXT:    movq %rdx, %xmm0
210; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
211; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
212; SSE2-NEXT:    movdqa %xmm2, %xmm3
213; SSE2-NEXT:    pand %xmm0, %xmm3
214; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
215; SSE2-NEXT:    por %xmm4, %xmm3
216; SSE2-NEXT:    psrlq $32, %xmm2
217; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
218; SSE2-NEXT:    por %xmm5, %xmm2
219; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
220; SSE2-NEXT:    subpd %xmm6, %xmm2
221; SSE2-NEXT:    addpd %xmm3, %xmm2
222; SSE2-NEXT:    pand %xmm1, %xmm0
223; SSE2-NEXT:    por %xmm4, %xmm0
224; SSE2-NEXT:    psrlq $32, %xmm1
225; SSE2-NEXT:    por %xmm5, %xmm1
226; SSE2-NEXT:    subpd %xmm6, %xmm1
227; SSE2-NEXT:    addpd %xmm0, %xmm1
228; SSE2-NEXT:    movapd %xmm2, %xmm0
229; SSE2-NEXT:    retq
230;
231; SSE41-LABEL: trunc_unsigned_v4f64:
232; SSE41:       # %bb.0:
233; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
234; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
235; SSE41-NEXT:    retq
236;
237; AVX-LABEL: trunc_unsigned_v4f64:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0
240; AVX-NEXT:    ret{{[l|q]}}
241  %i = fptoui <4 x double> %x to <4 x i64>
242  %r = uitofp <4 x i64> %i to <4 x double>
243  ret <4 x double> %r
244}
245
246define float @trunc_signed_f32_no_fast_math(float %x) {
247; SSE-LABEL: trunc_signed_f32_no_fast_math:
248; SSE:       # %bb.0:
249; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
250; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
251; SSE-NEXT:    retq
252;
253; X64-AVX1-LABEL: trunc_signed_f32_no_fast_math:
254; X64-AVX1:       # %bb.0:
255; X64-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
256; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
257; X64-AVX1-NEXT:    retq
258;
259; X86-AVX1-LABEL: trunc_signed_f32_no_fast_math:
260; X86-AVX1:       # %bb.0:
261; X86-AVX1-NEXT:    pushl %eax
262; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
263; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
264; X86-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
265; X86-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
266; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
267; X86-AVX1-NEXT:    flds (%esp)
268; X86-AVX1-NEXT:    popl %eax
269; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
270; X86-AVX1-NEXT:    retl
271  %i = fptosi float %x to i32
272  %r = sitofp i32 %i to float
273  ret float %r
274}
275
276; Without -0.0, it is ok to use roundss if it is available.
277
278define float @trunc_signed_f32_nsz(float %x) #0 {
279; SSE2-LABEL: trunc_signed_f32_nsz:
280; SSE2:       # %bb.0:
281; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
282; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
283; SSE2-NEXT:    retq
284;
285; SSE41-LABEL: trunc_signed_f32_nsz:
286; SSE41:       # %bb.0:
287; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
288; SSE41-NEXT:    retq
289;
290; X64-AVX1-LABEL: trunc_signed_f32_nsz:
291; X64-AVX1:       # %bb.0:
292; X64-AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
293; X64-AVX1-NEXT:    retq
294;
295; X86-AVX1-LABEL: trunc_signed_f32_nsz:
296; X86-AVX1:       # %bb.0:
297; X86-AVX1-NEXT:    pushl %eax
298; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
299; X86-AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
300; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
301; X86-AVX1-NEXT:    flds (%esp)
302; X86-AVX1-NEXT:    popl %eax
303; X86-AVX1-NEXT:    retl
304  %i = fptosi float %x to i32
305  %r = sitofp i32 %i to float
306  ret float %r
307}
308
309define double @trunc_signed32_f64_no_fast_math(double %x) {
310; SSE-LABEL: trunc_signed32_f64_no_fast_math:
311; SSE:       # %bb.0:
312; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
313; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
314; SSE-NEXT:    retq
315;
316; X64-AVX1-LABEL: trunc_signed32_f64_no_fast_math:
317; X64-AVX1:       # %bb.0:
318; X64-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
319; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
320; X64-AVX1-NEXT:    retq
321;
322; X86-AVX1-LABEL: trunc_signed32_f64_no_fast_math:
323; X86-AVX1:       # %bb.0:
324; X86-AVX1-NEXT:    pushl %ebp
325; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
326; X86-AVX1-NEXT:    .cfi_offset %ebp, -8
327; X86-AVX1-NEXT:    movl %esp, %ebp
328; X86-AVX1-NEXT:    .cfi_def_cfa_register %ebp
329; X86-AVX1-NEXT:    andl $-8, %esp
330; X86-AVX1-NEXT:    subl $8, %esp
331; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
332; X86-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
333; X86-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
334; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp)
335; X86-AVX1-NEXT:    fldl (%esp)
336; X86-AVX1-NEXT:    movl %ebp, %esp
337; X86-AVX1-NEXT:    popl %ebp
338; X86-AVX1-NEXT:    .cfi_def_cfa %esp, 4
339; X86-AVX1-NEXT:    retl
340  %i = fptosi double %x to i32
341  %r = sitofp i32 %i to double
342  ret double %r
343}
344
345define double @trunc_signed32_f64_nsz(double %x) #0 {
346; SSE2-LABEL: trunc_signed32_f64_nsz:
347; SSE2:       # %bb.0:
348; SSE2-NEXT:    cvttpd2dq %xmm0, %xmm0
349; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
350; SSE2-NEXT:    retq
351;
352; SSE41-LABEL: trunc_signed32_f64_nsz:
353; SSE41:       # %bb.0:
354; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
355; SSE41-NEXT:    retq
356;
357; X64-AVX1-LABEL: trunc_signed32_f64_nsz:
358; X64-AVX1:       # %bb.0:
359; X64-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
360; X64-AVX1-NEXT:    retq
361;
362; X86-AVX1-LABEL: trunc_signed32_f64_nsz:
363; X86-AVX1:       # %bb.0:
364; X86-AVX1-NEXT:    pushl %ebp
365; X86-AVX1-NEXT:    movl %esp, %ebp
366; X86-AVX1-NEXT:    andl $-8, %esp
367; X86-AVX1-NEXT:    subl $8, %esp
368; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
369; X86-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
370; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp)
371; X86-AVX1-NEXT:    fldl (%esp)
372; X86-AVX1-NEXT:    movl %ebp, %esp
373; X86-AVX1-NEXT:    popl %ebp
374; X86-AVX1-NEXT:    retl
375  %i = fptosi double %x to i32
376  %r = sitofp i32 %i to double
377  ret double %r
378}
379
380define double @trunc_f32_signed32_f64_no_fast_math(float %x) {
381; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math:
382; SSE:       # %bb.0:
383; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
384; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
385; SSE-NEXT:    retq
386;
387; X64-AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
388; X64-AVX1:       # %bb.0:
389; X64-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
390; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
391; X64-AVX1-NEXT:    retq
392;
393; X86-AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
394; X86-AVX1:       # %bb.0:
395; X86-AVX1-NEXT:    pushl %ebp
396; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
397; X86-AVX1-NEXT:    .cfi_offset %ebp, -8
398; X86-AVX1-NEXT:    movl %esp, %ebp
399; X86-AVX1-NEXT:    .cfi_def_cfa_register %ebp
400; X86-AVX1-NEXT:    andl $-8, %esp
401; X86-AVX1-NEXT:    subl $8, %esp
402; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
403; X86-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
404; X86-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
405; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp)
406; X86-AVX1-NEXT:    fldl (%esp)
407; X86-AVX1-NEXT:    movl %ebp, %esp
408; X86-AVX1-NEXT:    popl %ebp
409; X86-AVX1-NEXT:    .cfi_def_cfa %esp, 4
410; X86-AVX1-NEXT:    retl
411  %i = fptosi float %x to i32
412  %r = sitofp i32 %i to double
413  ret double %r
414}
415
416define double @trunc_f32_signed32_f64_nsz(float %x) #0 {
417; SSE-LABEL: trunc_f32_signed32_f64_nsz:
418; SSE:       # %bb.0:
419; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
420; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
421; SSE-NEXT:    retq
422;
423; X64-AVX1-LABEL: trunc_f32_signed32_f64_nsz:
424; X64-AVX1:       # %bb.0:
425; X64-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
426; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
427; X64-AVX1-NEXT:    retq
428;
429; X86-AVX1-LABEL: trunc_f32_signed32_f64_nsz:
430; X86-AVX1:       # %bb.0:
431; X86-AVX1-NEXT:    pushl %ebp
432; X86-AVX1-NEXT:    movl %esp, %ebp
433; X86-AVX1-NEXT:    andl $-8, %esp
434; X86-AVX1-NEXT:    subl $8, %esp
435; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
436; X86-AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
437; X86-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
438; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp)
439; X86-AVX1-NEXT:    fldl (%esp)
440; X86-AVX1-NEXT:    movl %ebp, %esp
441; X86-AVX1-NEXT:    popl %ebp
442; X86-AVX1-NEXT:    retl
443  %i = fptosi float %x to i32
444  %r = sitofp i32 %i to double
445  ret double %r
446}
447
448define float @trunc_f64_signed32_f32_no_fast_math(double %x) {
449; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math:
450; SSE:       # %bb.0:
451; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
452; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
453; SSE-NEXT:    retq
454;
455; X64-AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
456; X64-AVX1:       # %bb.0:
457; X64-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
458; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
459; X64-AVX1-NEXT:    retq
460;
461; X86-AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
462; X86-AVX1:       # %bb.0:
463; X86-AVX1-NEXT:    pushl %eax
464; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
465; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
466; X86-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
467; X86-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
468; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
469; X86-AVX1-NEXT:    flds (%esp)
470; X86-AVX1-NEXT:    popl %eax
471; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
472; X86-AVX1-NEXT:    retl
473  %i = fptosi double %x to i32
474  %r = sitofp i32 %i to float
475  ret float %r
476}
477
478define float @trunc_f64_signed32_f32_nsz(double %x) #0 {
479; SSE-LABEL: trunc_f64_signed32_f32_nsz:
480; SSE:       # %bb.0:
481; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
482; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
483; SSE-NEXT:    retq
484;
485; X64-AVX1-LABEL: trunc_f64_signed32_f32_nsz:
486; X64-AVX1:       # %bb.0:
487; X64-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
488; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
489; X64-AVX1-NEXT:    retq
490;
491; X86-AVX1-LABEL: trunc_f64_signed32_f32_nsz:
492; X86-AVX1:       # %bb.0:
493; X86-AVX1-NEXT:    pushl %eax
494; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
495; X86-AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
496; X86-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
497; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
498; X86-AVX1-NEXT:    flds (%esp)
499; X86-AVX1-NEXT:    popl %eax
500; X86-AVX1-NEXT:    retl
501  %i = fptosi double %x to i32
502  %r = sitofp i32 %i to float
503  ret float %r
504}
505
506define double @trunc_signed_f64_no_fast_math(double %x) {
507; SSE-LABEL: trunc_signed_f64_no_fast_math:
508; SSE:       # %bb.0:
509; SSE-NEXT:    cvttsd2si %xmm0, %rax
510; SSE-NEXT:    xorps %xmm0, %xmm0
511; SSE-NEXT:    cvtsi2sd %rax, %xmm0
512; SSE-NEXT:    retq
513;
514; X64-AVX1-LABEL: trunc_signed_f64_no_fast_math:
515; X64-AVX1:       # %bb.0:
516; X64-AVX1-NEXT:    vcvttsd2si %xmm0, %rax
517; X64-AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
518; X64-AVX1-NEXT:    retq
519;
520; X86-AVX1-LABEL: trunc_signed_f64_no_fast_math:
521; X86-AVX1:       # %bb.0:
522; X86-AVX1-NEXT:    pushl %ebp
523; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
524; X86-AVX1-NEXT:    .cfi_offset %ebp, -8
525; X86-AVX1-NEXT:    movl %esp, %ebp
526; X86-AVX1-NEXT:    .cfi_def_cfa_register %ebp
527; X86-AVX1-NEXT:    andl $-8, %esp
528; X86-AVX1-NEXT:    subl $24, %esp
529; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
530; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp)
531; X86-AVX1-NEXT:    fldl (%esp)
532; X86-AVX1-NEXT:    fisttpll (%esp)
533; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
534; X86-AVX1-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
535; X86-AVX1-NEXT:    fildll {{[0-9]+}}(%esp)
536; X86-AVX1-NEXT:    fstpl {{[0-9]+}}(%esp)
537; X86-AVX1-NEXT:    fldl {{[0-9]+}}(%esp)
538; X86-AVX1-NEXT:    movl %ebp, %esp
539; X86-AVX1-NEXT:    popl %ebp
540; X86-AVX1-NEXT:    .cfi_def_cfa %esp, 4
541; X86-AVX1-NEXT:    retl
542  %i = fptosi double %x to i64
543  %r = sitofp i64 %i to double
544  ret double %r
545}
546
547define double @trunc_signed_f64_nsz(double %x) #0 {
548; SSE2-LABEL: trunc_signed_f64_nsz:
549; SSE2:       # %bb.0:
550; SSE2-NEXT:    cvttsd2si %xmm0, %rax
551; SSE2-NEXT:    xorps %xmm0, %xmm0
552; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
553; SSE2-NEXT:    retq
554;
555; SSE41-LABEL: trunc_signed_f64_nsz:
556; SSE41:       # %bb.0:
557; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
558; SSE41-NEXT:    retq
559;
560; X64-AVX1-LABEL: trunc_signed_f64_nsz:
561; X64-AVX1:       # %bb.0:
562; X64-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
563; X64-AVX1-NEXT:    retq
564;
565; X86-AVX1-LABEL: trunc_signed_f64_nsz:
566; X86-AVX1:       # %bb.0:
567; X86-AVX1-NEXT:    pushl %ebp
568; X86-AVX1-NEXT:    movl %esp, %ebp
569; X86-AVX1-NEXT:    andl $-8, %esp
570; X86-AVX1-NEXT:    subl $8, %esp
571; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
572; X86-AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
573; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp)
574; X86-AVX1-NEXT:    fldl (%esp)
575; X86-AVX1-NEXT:    movl %ebp, %esp
576; X86-AVX1-NEXT:    popl %ebp
577; X86-AVX1-NEXT:    retl
578  %i = fptosi double %x to i64
579  %r = sitofp i64 %i to double
580  ret double %r
581}
582
583define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 {
584; SSE2-LABEL: trunc_signed_v4f32_nsz:
585; SSE2:       # %bb.0:
586; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
587; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
588; SSE2-NEXT:    retq
589;
590; SSE41-LABEL: trunc_signed_v4f32_nsz:
591; SSE41:       # %bb.0:
592; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
593; SSE41-NEXT:    retq
594;
595; AVX-LABEL: trunc_signed_v4f32_nsz:
596; AVX:       # %bb.0:
597; AVX-NEXT:    vroundps $11, %xmm0, %xmm0
598; AVX-NEXT:    ret{{[l|q]}}
599  %i = fptosi <4 x float> %x to <4 x i32>
600  %r = sitofp <4 x i32> %i to <4 x float>
601  ret <4 x float> %r
602}
603
604define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 {
605; SSE2-LABEL: trunc_signed_v2f64_nsz:
606; SSE2:       # %bb.0:
607; SSE2-NEXT:    cvttsd2si %xmm0, %rax
608; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
609; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
610; SSE2-NEXT:    xorps %xmm0, %xmm0
611; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
612; SSE2-NEXT:    cvtsi2sd %rcx, %xmm1
613; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
614; SSE2-NEXT:    retq
615;
616; SSE41-LABEL: trunc_signed_v2f64_nsz:
617; SSE41:       # %bb.0:
618; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
619; SSE41-NEXT:    retq
620;
621; AVX-LABEL: trunc_signed_v2f64_nsz:
622; AVX:       # %bb.0:
623; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0
624; AVX-NEXT:    ret{{[l|q]}}
625  %i = fptosi <2 x double> %x to <2 x i64>
626  %r = sitofp <2 x i64> %i to <2 x double>
627  ret <2 x double> %r
628}
629
630define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 {
631; SSE2-LABEL: trunc_signed_v4f64_nsz:
632; SSE2:       # %bb.0:
633; SSE2-NEXT:    cvttsd2si %xmm1, %rax
634; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
635; SSE2-NEXT:    cvttsd2si %xmm1, %rcx
636; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
637; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
638; SSE2-NEXT:    cvttsd2si %xmm0, %rsi
639; SSE2-NEXT:    xorps %xmm0, %xmm0
640; SSE2-NEXT:    cvtsi2sd %rdx, %xmm0
641; SSE2-NEXT:    xorps %xmm1, %xmm1
642; SSE2-NEXT:    cvtsi2sd %rsi, %xmm1
643; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
644; SSE2-NEXT:    xorps %xmm1, %xmm1
645; SSE2-NEXT:    cvtsi2sd %rax, %xmm1
646; SSE2-NEXT:    cvtsi2sd %rcx, %xmm2
647; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
648; SSE2-NEXT:    retq
649;
650; SSE41-LABEL: trunc_signed_v4f64_nsz:
651; SSE41:       # %bb.0:
652; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
653; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
654; SSE41-NEXT:    retq
655;
656; AVX-LABEL: trunc_signed_v4f64_nsz:
657; AVX:       # %bb.0:
658; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0
659; AVX-NEXT:    ret{{[l|q]}}
660  %i = fptosi <4 x double> %x to <4 x i64>
661  %r = sitofp <4 x i64> %i to <4 x double>
662  ret <4 x double> %r
663}
664
665; The FTRUNC ("round**" x86 asm) fold relies on UB in the case of overflow.
666; This used to be guarded with an attribute check. That allowed existing
667; code to continue working based on its assumptions that float->int
668; overflow had saturating behavior.
669;
670; Now, we expect a front-end to use IR intrinsics if it wants to avoid this
671; transform.
672
673define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #0 {
674; SSE-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
675; SSE:       # %bb.0:
676; SSE-NEXT:    cvttss2si %xmm0, %rax
677; SSE-NEXT:    xorl %ecx, %ecx
678; SSE-NEXT:    xorps %xmm1, %xmm1
679; SSE-NEXT:    ucomiss %xmm1, %xmm0
680; SSE-NEXT:    cmovael %eax, %ecx
681; SSE-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
682; SSE-NEXT:    movl $-1, %eax
683; SSE-NEXT:    cmovbel %ecx, %eax
684; SSE-NEXT:    xorps %xmm0, %xmm0
685; SSE-NEXT:    cvtsi2ss %rax, %xmm0
686; SSE-NEXT:    retq
687;
688; X64-AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
689; X64-AVX1:       # %bb.0:
690; X64-AVX1-NEXT:    vcvttss2si %xmm0, %rax
691; X64-AVX1-NEXT:    xorl %ecx, %ecx
692; X64-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
693; X64-AVX1-NEXT:    vucomiss %xmm1, %xmm0
694; X64-AVX1-NEXT:    cmovael %eax, %ecx
695; X64-AVX1-NEXT:    vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
696; X64-AVX1-NEXT:    movl $-1, %eax
697; X64-AVX1-NEXT:    cmovbel %ecx, %eax
698; X64-AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
699; X64-AVX1-NEXT:    retq
700;
701; X86-AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
702; X86-AVX1:       # %bb.0:
703; X86-AVX1-NEXT:    pushl %eax
704; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
705; X86-AVX1-NEXT:    vcvttss2si %xmm0, %eax
706; X86-AVX1-NEXT:    movl %eax, %ecx
707; X86-AVX1-NEXT:    sarl $31, %ecx
708; X86-AVX1-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
709; X86-AVX1-NEXT:    vcvttss2si %xmm1, %edx
710; X86-AVX1-NEXT:    andl %ecx, %edx
711; X86-AVX1-NEXT:    orl %eax, %edx
712; X86-AVX1-NEXT:    xorl %eax, %eax
713; X86-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
714; X86-AVX1-NEXT:    vucomiss %xmm1, %xmm0
715; X86-AVX1-NEXT:    cmovael %edx, %eax
716; X86-AVX1-NEXT:    vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
717; X86-AVX1-NEXT:    movl $-1, %ecx
718; X86-AVX1-NEXT:    cmovbel %eax, %ecx
719; X86-AVX1-NEXT:    vmovd %ecx, %xmm0
720; X86-AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
721; X86-AVX1-NEXT:    vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
722; X86-AVX1-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
723; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
724; X86-AVX1-NEXT:    flds (%esp)
725; X86-AVX1-NEXT:    popl %eax
726; X86-AVX1-NEXT:    retl
727  %i = call i32 @llvm.fptoui.sat.i32.f32(float %x)
728  %r = uitofp i32 %i to float
729  ret float %r
730}
731
732define double @trunc_signed_f64_disable_via_intrinsic(double %x) #0 {
733; SSE-LABEL: trunc_signed_f64_disable_via_intrinsic:
734; SSE:       # %bb.0:
735; SSE-NEXT:    cvttsd2si %xmm0, %rax
736; SSE-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
737; SSE-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
738; SSE-NEXT:    cmovbeq %rax, %rcx
739; SSE-NEXT:    xorl %eax, %eax
740; SSE-NEXT:    ucomisd %xmm0, %xmm0
741; SSE-NEXT:    cmovnpq %rcx, %rax
742; SSE-NEXT:    xorps %xmm0, %xmm0
743; SSE-NEXT:    cvtsi2sd %rax, %xmm0
744; SSE-NEXT:    retq
745;
746; X64-AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
747; X64-AVX1:       # %bb.0:
748; X64-AVX1-NEXT:    vcvttsd2si %xmm0, %rax
749; X64-AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
750; X64-AVX1-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
751; X64-AVX1-NEXT:    cmovbeq %rax, %rcx
752; X64-AVX1-NEXT:    xorl %eax, %eax
753; X64-AVX1-NEXT:    vucomisd %xmm0, %xmm0
754; X64-AVX1-NEXT:    cmovnpq %rcx, %rax
755; X64-AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
756; X64-AVX1-NEXT:    retq
757;
758; X86-AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
759; X86-AVX1:       # %bb.0:
760; X86-AVX1-NEXT:    pushl %ebp
761; X86-AVX1-NEXT:    movl %esp, %ebp
762; X86-AVX1-NEXT:    pushl %esi
763; X86-AVX1-NEXT:    andl $-8, %esp
764; X86-AVX1-NEXT:    subl $32, %esp
765; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
766; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp)
767; X86-AVX1-NEXT:    fldl (%esp)
768; X86-AVX1-NEXT:    fisttpll (%esp)
769; X86-AVX1-NEXT:    xorl %eax, %eax
770; X86-AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
771; X86-AVX1-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
772; X86-AVX1-NEXT:    movl $0, %edx
773; X86-AVX1-NEXT:    jb .LBB19_2
774; X86-AVX1-NEXT:  # %bb.1:
775; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
776; X86-AVX1-NEXT:    movl (%esp), %edx
777; X86-AVX1-NEXT:  .LBB19_2:
778; X86-AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
779; X86-AVX1-NEXT:    movl $-1, %esi
780; X86-AVX1-NEXT:    cmovbel %edx, %esi
781; X86-AVX1-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
782; X86-AVX1-NEXT:    cmovbel %ecx, %edx
783; X86-AVX1-NEXT:    vucomisd %xmm0, %xmm0
784; X86-AVX1-NEXT:    cmovpl %eax, %edx
785; X86-AVX1-NEXT:    cmovpl %eax, %esi
786; X86-AVX1-NEXT:    vmovd %esi, %xmm0
787; X86-AVX1-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
788; X86-AVX1-NEXT:    vmovq %xmm0, {{[0-9]+}}(%esp)
789; X86-AVX1-NEXT:    fildll {{[0-9]+}}(%esp)
790; X86-AVX1-NEXT:    fstpl {{[0-9]+}}(%esp)
791; X86-AVX1-NEXT:    fldl {{[0-9]+}}(%esp)
792; X86-AVX1-NEXT:    leal -4(%ebp), %esp
793; X86-AVX1-NEXT:    popl %esi
794; X86-AVX1-NEXT:    popl %ebp
795; X86-AVX1-NEXT:    retl
796  %i = call i64 @llvm.fptosi.sat.i64.f64(double %x)
797  %r = sitofp i64 %i to double
798  ret double %r
799}
800
801attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
802