xref: /llvm-project/llvm/test/CodeGen/X86/sse-fcopysign.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
4
5;
6; Library Functions
7;
8
9define float @tst1(float %a, float %b) nounwind {
10; X86-LABEL: tst1:
11; X86:       # %bb.0:
12; X86-NEXT:    subl $8, %esp
13; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
14; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
15; X86-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
16; X86-NEXT:    movss %xmm0, (%esp)
17; X86-NEXT:    calll copysignf
18; X86-NEXT:    addl $8, %esp
19; X86-NEXT:    retl
20;
21; X64-LABEL: tst1:
22; X64:       # %bb.0:
23; X64-NEXT:    movaps %xmm0, %xmm2
24; X64-NEXT:    movaps %xmm1, %xmm0
25; X64-NEXT:    movaps %xmm2, %xmm1
26; X64-NEXT:    jmp copysignf # TAILCALL
27  %tmp = tail call float @copysignf( float %b, float %a )
28  ret float %tmp
29}
30
31define double @tst2(double %a, float %b, float %c) nounwind {
32; X86-LABEL: tst2:
33; X86:       # %bb.0:
34; X86-NEXT:    subl $16, %esp
35; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
36; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
37; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm1
38; X86-NEXT:    cvtss2sd %xmm1, %xmm1
39; X86-NEXT:    movsd %xmm0, (%esp)
40; X86-NEXT:    movsd %xmm1, {{[0-9]+}}(%esp)
41; X86-NEXT:    calll copysign
42; X86-NEXT:    addl $16, %esp
43; X86-NEXT:    retl
44;
45; X64-LABEL: tst2:
46; X64:       # %bb.0:
47; X64-NEXT:    addss %xmm2, %xmm1
48; X64-NEXT:    cvtss2sd %xmm1, %xmm1
49; X64-NEXT:    jmp copysign # TAILCALL
50  %tmp1 = fadd float %b, %c
51  %tmp2 = fpext float %tmp1 to double
52  %tmp = tail call double @copysign( double %a, double %tmp2 )
53  ret double %tmp
54}
55
56define x86_fp80 @tst3(x86_fp80 %a, x86_fp80 %b) nounwind {
57; X86-LABEL: tst3:
58; X86:       # %bb.0:
59; X86-NEXT:    subl $24, %esp
60; X86-NEXT:    fldt {{[0-9]+}}(%esp)
61; X86-NEXT:    fldt {{[0-9]+}}(%esp)
62; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
63; X86-NEXT:    fstpt (%esp)
64; X86-NEXT:    calll copysignl
65; X86-NEXT:    addl $24, %esp
66; X86-NEXT:    retl
67;
68; X64-LABEL: tst3:
69; X64:       # %bb.0:
70; X64-NEXT:    subq $40, %rsp
71; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
72; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
73; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
74; X64-NEXT:    fstpt (%rsp)
75; X64-NEXT:    callq copysignl
76; X64-NEXT:    addq $40, %rsp
77; X64-NEXT:    retq
78  %tmp = tail call x86_fp80 @copysignl( x86_fp80 %b, x86_fp80 %a )
79  ret x86_fp80 %tmp
80}
81
82declare dso_local float @copysignf(float, float)
83declare dso_local double @copysign(double, double)
84declare dso_local x86_fp80 @copysignl(x86_fp80, x86_fp80)
85
86;
87; LLVM Intrinsic
88;
89
90define float @int1(float %a, float %b) nounwind {
91; X86-LABEL: int1:
92; X86:       # %bb.0:
93; X86-NEXT:    pushl %eax
94; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
95; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
96; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
97; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
98; X86-NEXT:    orps %xmm0, %xmm1
99; X86-NEXT:    movss %xmm1, (%esp)
100; X86-NEXT:    flds (%esp)
101; X86-NEXT:    popl %eax
102; X86-NEXT:    retl
103;
104; X64-LABEL: int1:
105; X64:       # %bb.0:
106; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
107; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
108; X64-NEXT:    orps %xmm1, %xmm0
109; X64-NEXT:    retq
110  %tmp = tail call float @llvm.copysign.f32( float %b, float %a )
111  ret float %tmp
112}
113
114define double @int2(double %a, float %b, float %c) nounwind {
115; X86-LABEL: int2:
116; X86:       # %bb.0:
117; X86-NEXT:    pushl %ebp
118; X86-NEXT:    movl %esp, %ebp
119; X86-NEXT:    andl $-8, %esp
120; X86-NEXT:    subl $8, %esp
121; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
122; X86-NEXT:    addss 20(%ebp), %xmm0
123; X86-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
124; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
125; X86-NEXT:    cvtss2sd %xmm0, %xmm0
126; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
127; X86-NEXT:    orps %xmm1, %xmm0
128; X86-NEXT:    movlps %xmm0, (%esp)
129; X86-NEXT:    fldl (%esp)
130; X86-NEXT:    movl %ebp, %esp
131; X86-NEXT:    popl %ebp
132; X86-NEXT:    retl
133;
134; X64-LABEL: int2:
135; X64:       # %bb.0:
136; X64-NEXT:    addss %xmm2, %xmm1
137; X64-NEXT:    cvtss2sd %xmm1, %xmm1
138; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
139; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
140; X64-NEXT:    orps %xmm1, %xmm0
141; X64-NEXT:    retq
142  %tmp1 = fadd float %b, %c
143  %tmp2 = fpext float %tmp1 to double
144  %tmp = tail call double @llvm.copysign.f64( double %a, double %tmp2 )
145  ret double %tmp
146}
147
148define x86_fp80 @int3(x86_fp80 %a, x86_fp80 %b) nounwind {
149; X86-LABEL: int3:
150; X86:       # %bb.0:
151; X86-NEXT:    subl $12, %esp
152; X86-NEXT:    fldt {{[0-9]+}}(%esp)
153; X86-NEXT:    fldt {{[0-9]+}}(%esp)
154; X86-NEXT:    fstpt (%esp)
155; X86-NEXT:    fabs
156; X86-NEXT:    fld %st(0)
157; X86-NEXT:    fchs
158; X86-NEXT:    testb $-128, {{[0-9]+}}(%esp)
159; X86-NEXT:    fxch %st(1)
160; X86-NEXT:    fcmovne %st(1), %st
161; X86-NEXT:    fstp %st(1)
162; X86-NEXT:    addl $12, %esp
163; X86-NEXT:    retl
164;
165; X64-LABEL: int3:
166; X64:       # %bb.0:
167; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
168; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
169; X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
170; X64-NEXT:    fabs
171; X64-NEXT:    fld %st(0)
172; X64-NEXT:    fchs
173; X64-NEXT:    testb $-128, -{{[0-9]+}}(%rsp)
174; X64-NEXT:    fxch %st(1)
175; X64-NEXT:    fcmovne %st(1), %st
176; X64-NEXT:    fstp %st(1)
177; X64-NEXT:    retq
178  %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 %b, x86_fp80 %a )
179  ret x86_fp80 %tmp
180}
181
182define float @cst1() nounwind {
183; X86-LABEL: cst1:
184; X86:       # %bb.0:
185; X86-NEXT:    fld1
186; X86-NEXT:    fchs
187; X86-NEXT:    retl
188;
189; X64-LABEL: cst1:
190; X64:       # %bb.0:
191; X64-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
192; X64-NEXT:    retq
193  %tmp = tail call float @llvm.copysign.f32( float 1.0, float -2.0 )
194  ret float %tmp
195}
196
197define double @cst2() nounwind {
198; X86-LABEL: cst2:
199; X86:       # %bb.0:
200; X86-NEXT:    fldz
201; X86-NEXT:    fchs
202; X86-NEXT:    retl
203;
204; X64-LABEL: cst2:
205; X64:       # %bb.0:
206; X64-NEXT:    movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
207; X64-NEXT:    retq
208  %tmp1 = fadd float -1.0, -1.0
209  %tmp2 = fpext float %tmp1 to double
210  %tmp = tail call double @llvm.copysign.f64( double 0.0, double %tmp2 )
211  ret double %tmp
212}
213
214define x86_fp80 @cst3() nounwind {
215; X86-LABEL: cst3:
216; X86:       # %bb.0:
217; X86-NEXT:    fldz
218; X86-NEXT:    fchs
219; X86-NEXT:    retl
220;
221; X64-LABEL: cst3:
222; X64:       # %bb.0:
223; X64-NEXT:    fldz
224; X64-NEXT:    fchs
225; X64-NEXT:    retq
226  %tmp1 = fadd float -1.0, -1.0
227  %tmp2 = fpext float %tmp1 to x86_fp80
228  %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 zeroinitializer, x86_fp80 %tmp2 )
229  ret x86_fp80 %tmp
230}
231
232define void @PR41749() {
233; X86-LABEL: PR41749:
234; X86:       # %bb.0:
235; X86-NEXT:    subl $12, %esp
236; X86-NEXT:    .cfi_def_cfa_offset 16
237; X86-NEXT:    fldz
238; X86-NEXT:    fld %st(0)
239; X86-NEXT:    fstpt (%esp)
240; X86-NEXT:    testb $-128, {{[0-9]+}}(%esp)
241; X86-NEXT:    fld %st(0)
242; X86-NEXT:    fchs
243; X86-NEXT:    fxch %st(1)
244; X86-NEXT:    fcmovne %st(1), %st
245; X86-NEXT:    fstp %st(1)
246; X86-NEXT:    fstpt (%eax)
247; X86-NEXT:    addl $12, %esp
248; X86-NEXT:    .cfi_def_cfa_offset 4
249; X86-NEXT:    retl
250;
251; X64-LABEL: PR41749:
252; X64:       # %bb.0:
253; X64-NEXT:    fldz
254; X64-NEXT:    fld %st(0)
255; X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
256; X64-NEXT:    testb $-128, -{{[0-9]+}}(%rsp)
257; X64-NEXT:    fld %st(0)
258; X64-NEXT:    fchs
259; X64-NEXT:    fxch %st(1)
260; X64-NEXT:    fcmovne %st(1), %st
261; X64-NEXT:    fstp %st(1)
262; X64-NEXT:    fstpt (%rax)
263; X64-NEXT:    retq
264  %1 = call x86_fp80 @llvm.copysign.f80(x86_fp80 0xK00000000000000000000, x86_fp80 undef)
265  store x86_fp80 %1, ptr undef, align 16
266  ret void
267}
268
269declare dso_local float     @llvm.copysign.f32(float  %Mag, float  %Sgn)
270declare dso_local double    @llvm.copysign.f64(double %Mag, double %Sgn)
271declare dso_local x86_fp80  @llvm.copysign.f80(x86_fp80 %Mag, x86_fp80 %Sgn)
272