xref: /llvm-project/llvm/test/CodeGen/X86/combine-fcopysign.ll (revision 0aef747b846586c29ed3285bbed20a3d607576fa)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5;
6; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
7; so we need to edit it to remove the NAN constant comments
8;
9
10; copysign(x, c1) -> fabs(x) iff ispos(c1)
11define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
12; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
13; SSE:       # %bb.0:
14; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
20; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
21; AVX-NEXT:    retq
22  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
23  ret <4 x float> %1
24}
25
26define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
27; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
28; SSE:       # %bb.0:
29; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30; SSE-NEXT:    retq
31;
32; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
33; AVX:       # %bb.0:
34; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
35; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
36; AVX-NEXT:    retq
37  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
38  ret <4 x float> %1
39}
40
41define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) {
42; SSE-LABEL: combine_vec_fcopysign_fabs_sgn:
43; SSE:       # %bb.0:
44; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
45; SSE-NEXT:    retq
46;
47; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
48; AVX:       # %bb.0:
49; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
50; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
51; AVX-NEXT:    retq
52  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
53  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
54  ret <4 x float> %2
55}
56
57; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
58define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
59; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
60; SSE:       # %bb.0:
61; SSE-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
62; SSE-NEXT:    retq
63;
64; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
65; AVX:       # %bb.0:
66; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
67; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
68; AVX-NEXT:    retq
69  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
70  ret <4 x float> %1
71}
72
73define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
74; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
75; SSE:       # %bb.0:
76; SSE-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
80; AVX:       # %bb.0:
81; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
82; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
83; AVX-NEXT:    retq
84  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
85  ret <4 x float> %1
86}
87
88define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
89; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
90; SSE:       # %bb.0:
91; SSE-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
92; SSE-NEXT:    retq
93;
94; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
95; AVX:       # %bb.0:
96; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
97; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
98; AVX-NEXT:    retq
99  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
100  %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
101  %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
102  ret <4 x float> %3
103}
104
105; copysign(fabs(x), y) -> copysign(x, y)
106define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) {
107; SSE-LABEL: combine_vec_fcopysign_fabs_mag:
108; SSE:       # %bb.0:
109; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
110; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
111; SSE-NEXT:    orps %xmm1, %xmm0
112; SSE-NEXT:    retq
113;
114; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
115; AVX:       # %bb.0:
116; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
117; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
118; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
119; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
120; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
121; AVX-NEXT:    retq
122  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
123  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
124  ret <4 x float> %2
125}
126
127; copysign(fneg(x), y) -> copysign(x, y)
128define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) {
129; SSE-LABEL: combine_vec_fcopysign_fneg_mag:
130; SSE:       # %bb.0:
131; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
132; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
133; SSE-NEXT:    orps %xmm1, %xmm0
134; SSE-NEXT:    retq
135;
136; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
137; AVX:       # %bb.0:
138; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
139; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
140; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
141; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
142; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
143; AVX-NEXT:    retq
144  %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
145  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
146  ret <4 x float> %2
147}
148
149; copysign(copysign(x,z), y) -> copysign(x, y)
150define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
151; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag:
152; SSE:       # %bb.0:
153; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
154; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
155; SSE-NEXT:    orps %xmm1, %xmm0
156; SSE-NEXT:    retq
157;
158; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
159; AVX:       # %bb.0:
160; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
161; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
162; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
163; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
164; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
165; AVX-NEXT:    retq
166  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
167  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
168  ret <4 x float> %2
169}
170
171; copysign(x, copysign(y,z)) -> copysign(x, z)
172define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
173; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn:
174; SSE:       # %bb.0:
175; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
176; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177; SSE-NEXT:    orps %xmm2, %xmm0
178; SSE-NEXT:    retq
179;
180; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
181; AVX:       # %bb.0:
182; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
183; AVX-NEXT:    vandps %xmm1, %xmm2, %xmm1
184; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
185; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
186; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
187; AVX-NEXT:    retq
188  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
189  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
190  ret <4 x float> %2
191}
192
193; copysign(x, fp_extend(y)) -> copysign(x, y)
194define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) {
195; SSE-LABEL: combine_vec_fcopysign_fpext_sgn:
196; SSE:       # %bb.0:
197; SSE-NEXT:    cvtps2pd %xmm2, %xmm3
198; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
199; SSE-NEXT:    cvtps2pd %xmm2, %xmm2
200; SSE-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN]
201; SSE-NEXT:    andps %xmm4, %xmm0
202; SSE-NEXT:    movaps %xmm4, %xmm5
203; SSE-NEXT:    andnps %xmm3, %xmm5
204; SSE-NEXT:    orps %xmm5, %xmm0
205; SSE-NEXT:    andps %xmm4, %xmm1
206; SSE-NEXT:    andnps %xmm2, %xmm4
207; SSE-NEXT:    orps %xmm4, %xmm1
208; SSE-NEXT:    retq
209;
210; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
211; AVX:       # %bb.0:
212; AVX-NEXT:    vcvtps2pd %xmm1, %ymm1
213; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
214; AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
215; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
216; AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
217; AVX-NEXT:    vorps %ymm1, %ymm0, %ymm0
218; AVX-NEXT:    retq
219  %1 = fpext <4 x float> %y to <4 x double>
220  %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
221  ret <4 x double> %2
222}
223
224; copysign(x, fp_round(y)) -> copysign(x, y)
225define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) {
226; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn:
227; SSE:       # %bb.0:
228; SSE-NEXT:    cvtpd2ps %xmm2, %xmm2
229; SSE-NEXT:    cvtpd2ps %xmm1, %xmm1
230; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
231; SSE-NEXT:    andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
232; SSE-NEXT:    andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233; SSE-NEXT:    orpd %xmm1, %xmm0
234; SSE-NEXT:    retq
235;
236; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
237; AVX:       # %bb.0:
238; AVX-NEXT:    vcvtpd2ps %ymm1, %xmm1
239; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
240; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
241; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
242; AVX-NEXT:    vandpd %xmm2, %xmm1, %xmm1
243; AVX-NEXT:    vorpd %xmm1, %xmm0, %xmm0
244; AVX-NEXT:    vzeroupper
245; AVX-NEXT:    retq
246  %1 = fptrunc <4 x double> %y to <4 x float>
247  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
248  ret <4 x float> %2
249}
250
251declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
252declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
253declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn)
254