xref: /llvm-project/llvm/test/CodeGen/X86/combine-fabs.ll (revision dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5;
6; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
7; so we need to edit it to remove the NAN constant comments
8;
9
10; fabs(c1) -> c2
11define float @combine_fabs_constant() {
12; SSE-LABEL: combine_fabs_constant:
13; SSE:       # %bb.0:
14; SSE-NEXT:    movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: combine_fabs_constant:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
20; AVX-NEXT:    retq
21  %1 = call float @llvm.fabs.f32(float -2.0)
22  ret float %1
23}
24
25define <4 x float> @combine_vec_fabs_constant() {
26; SSE-LABEL: combine_vec_fabs_constant:
27; SSE:       # %bb.0:
28; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
29; SSE-NEXT:    retq
30;
31; AVX-LABEL: combine_vec_fabs_constant:
32; AVX:       # %bb.0:
33; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
34; AVX-NEXT:    retq
35  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
36  ret <4 x float> %1
37}
38
39; fabs(fabs(x)) -> fabs(x)
40define float @combine_fabs_fabs(float %a) {
41; SSE-LABEL: combine_fabs_fabs:
42; SSE:       # %bb.0:
43; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: combine_fabs_fabs:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
49; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
50; AVX-NEXT:    retq
51  %1 = call float @llvm.fabs.f32(float %a)
52  %2 = call float @llvm.fabs.f32(float %1)
53  ret float %2
54}
55
56define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {
57; SSE-LABEL: combine_vec_fabs_fabs:
58; SSE:       # %bb.0:
59; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
60; SSE-NEXT:    retq
61;
62; AVX-LABEL: combine_vec_fabs_fabs:
63; AVX:       # %bb.0:
64; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
65; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
66; AVX-NEXT:    retq
67  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
68  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
69  ret <4 x float> %2
70}
71
72; fabs(fneg(x)) -> fabs(x)
73define float @combine_fabs_fneg(float %a) {
74; SSE-LABEL: combine_fabs_fneg:
75; SSE:       # %bb.0:
76; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: combine_fabs_fneg:
80; AVX:       # %bb.0:
81; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
82; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
83; AVX-NEXT:    retq
84  %1 = fsub float -0.0, %a
85  %2 = call float @llvm.fabs.f32(float %1)
86  ret float %2
87}
88
89define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {
90; SSE-LABEL: combine_vec_fabs_fneg:
91; SSE:       # %bb.0:
92; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
93; SSE-NEXT:    retq
94;
95; AVX-LABEL: combine_vec_fabs_fneg:
96; AVX:       # %bb.0:
97; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
98; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
99; AVX-NEXT:    retq
100  %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a
101  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
102  ret <4 x float> %2
103}
104
105; fabs(fcopysign(x, y)) -> fabs(x)
106define float @combine_fabs_fcopysign(float %a, float %b) {
107; SSE-LABEL: combine_fabs_fcopysign:
108; SSE:       # %bb.0:
109; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: combine_fabs_fcopysign:
113; AVX:       # %bb.0:
114; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
115; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
116; AVX-NEXT:    retq
117  %1 = call float @llvm.copysign.f32(float %a, float %b)
118  %2 = call float @llvm.fabs.f32(float %1)
119  ret float %2
120}
121
122define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
123; SSE-LABEL: combine_vec_fabs_fcopysign:
124; SSE:       # %bb.0:
125; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
126; SSE-NEXT:    retq
127;
128; AVX-LABEL: combine_vec_fabs_fcopysign:
129; AVX:       # %bb.0:
130; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
131; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
132; AVX-NEXT:    retq
133  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
134  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
135  ret <4 x float> %2
136}
137
138; store(fabs(load())) - convert scalar to integer
139define void @combine_fabs_int_rmw_f64(ptr %ptr) {
140; SSE-LABEL: combine_fabs_int_rmw_f64:
141; SSE:       # %bb.0:
142; SSE-NEXT:    andb $127, 7(%rdi)
143; SSE-NEXT:    retq
144;
145; AVX-LABEL: combine_fabs_int_rmw_f64:
146; AVX:       # %bb.0:
147; AVX-NEXT:    andb $127, 7(%rdi)
148; AVX-NEXT:    retq
149  %1 = load double, ptr %ptr
150  %2 = call double @llvm.fabs.f64(double %1)
151  store double %2, ptr %ptr
152  ret void
153}
154
155define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
156; SSE-LABEL: combine_fabs_int_f32:
157; SSE:       # %bb.0:
158; SSE-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
159; SSE-NEXT:    andl (%rdi), %eax
160; SSE-NEXT:    movl %eax, (%rsi)
161; SSE-NEXT:    retq
162;
163; AVX-LABEL: combine_fabs_int_f32:
164; AVX:       # %bb.0:
165; AVX-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
166; AVX-NEXT:    andl (%rdi), %eax
167; AVX-NEXT:    movl %eax, (%rsi)
168; AVX-NEXT:    retq
169  %1 = load float, ptr %src
170  %2 = call float @llvm.fabs.f32(float %1)
171  store float %2, ptr %dst
172  ret void
173}
174
175define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
176; SSE-LABEL: combine_fabs_int_rmw_bfloat:
177; SSE:       # %bb.0:
178; SSE-NEXT:    andb $127, 1(%rdi)
179; SSE-NEXT:    retq
180;
181; AVX-LABEL: combine_fabs_int_rmw_bfloat:
182; AVX:       # %bb.0:
183; AVX-NEXT:    andb $127, 1(%rdi)
184; AVX-NEXT:    retq
185  %1 = load bfloat, ptr %ptr
186  %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
187  store bfloat %2, ptr %ptr
188  ret void
189}
190
191define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
192; SSE-LABEL: combine_fabs_int_half:
193; SSE:       # %bb.0:
194; SSE-NEXT:    movzwl (%rdi), %eax
195; SSE-NEXT:    andl $32767, %eax # imm = 0x7FFF
196; SSE-NEXT:    movw %ax, (%rsi)
197; SSE-NEXT:    retq
198;
199; AVX-LABEL: combine_fabs_int_half:
200; AVX:       # %bb.0:
201; AVX-NEXT:    movzwl (%rdi), %eax
202; AVX-NEXT:    andl $32767, %eax # imm = 0x7FFF
203; AVX-NEXT:    movw %ax, (%rsi)
204; AVX-NEXT:    retq
205  %1 = load half, ptr %src
206  %2 = call half @llvm.fabs.f16(half %1)
207  store half %2, ptr %dst
208  ret void
209}
210
211; don't convert vector to scalar
212define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
213; SSE-LABEL: combine_fabs_vec_int_v4f32:
214; SSE:       # %bb.0:
215; SSE-NEXT:    movaps (%rdi), %xmm0
216; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
217; SSE-NEXT:    movaps %xmm0, (%rsi)
218; SSE-NEXT:    retq
219;
220; AVX-LABEL: combine_fabs_vec_int_v4f32:
221; AVX:       # %bb.0:
222; AVX-NEXT:    vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
223; AVX-NEXT:    vandps (%rdi), %xmm0, %xmm0
224; AVX-NEXT:    vmovaps %xmm0, (%rsi)
225; AVX-NEXT:    retq
226  %1 = load <4 x float>, ptr %src
227  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
228  store <4 x float> %2, ptr %dst
229  ret void
230}
231
232declare float @llvm.fabs.f32(float %p)
233declare float @llvm.copysign.f32(float %Mag, float %Sgn)
234
235declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
236declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
237