xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fold-fabs.ll (revision f78b3466caa9296b32ec235dee87ace2dea94507)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -disable-machine-sink=1 - < %s | FileCheck -check-prefix=GFX10 %s
3
4define float @fold_abs_in_branch(float %arg1, float %arg2) {
5; GFX10-LABEL: fold_abs_in_branch:
6; GFX10:       ; %bb.0: ; %entry
7; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
9; GFX10-NEXT:    s_mov_b32 s4, exec_lo
10; GFX10-NEXT:    v_add_f32_e32 v1, v0, v1
11; GFX10-NEXT:    v_add_f32_e64 v0, |v1|, |v1|
12; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
13; GFX10-NEXT:  ; %bb.1: ; %if
14; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |v1|
15; GFX10-NEXT:  ; %bb.2: ; %exit
16; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
17; GFX10-NEXT:    s_setpc_b64 s[30:31]
18entry:
19  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
20  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
21  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
22  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
23  %4 = fcmp ule float %3, 1.000000e+00
24  br i1 %4, label %if, label %exit
25
26if:
27  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
28  br label %exit
29
30exit:
31  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
32  ret float %ret
33}
34
35define float @fold_abs_in_branch_multiple_users(float %arg1, float %arg2) {
36; GFX10-LABEL: fold_abs_in_branch_multiple_users:
37; GFX10:       ; %bb.0: ; %entry
38; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
40; GFX10-NEXT:    s_mov_b32 s4, exec_lo
41; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
42; GFX10-NEXT:    v_add_f32_e64 v1, |v0|, |v0|
43; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v1
44; GFX10-NEXT:  ; %bb.1: ; %if
45; GFX10-NEXT:    v_mul_f32_e64 v1, 0x3e4ccccd, |v0|
46; GFX10-NEXT:  ; %bb.2: ; %exit
47; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
48; GFX10-NEXT:    v_add_f32_e64 v0, |v0|, 2.0
49; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
50; GFX10-NEXT:    s_setpc_b64 s[30:31]
51entry:
52  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
53  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
54  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
55  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
56  %4 = fcmp ule float %3, 1.000000e+00
57  br i1 %4, label %if, label %exit
58
59if:
60  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
61  br label %exit
62
63exit:
64  %exit.phi = phi float [ %3, %entry ], [ %if.3, %if ]
65  %ret.0 = fadd reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
66  %ret.1 = fmul float %ret.0, %exit.phi
67  ret float %ret.1
68}
69
70define float @fold_abs_in_branch_undef(float %arg1, float %arg2) {
71; GFX10-LABEL: fold_abs_in_branch_undef:
72; GFX10:       ; %bb.0: ; %entry
73; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX10-NEXT:    v_add_f32_e64 v0, |s4|, |s4|
75; GFX10-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 1.0, v0
76; GFX10-NEXT:    s_cbranch_vccnz .LBB2_2
77; GFX10-NEXT:  ; %bb.1: ; %if
78; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |s4|
79; GFX10-NEXT:  .LBB2_2: ; %exit
80; GFX10-NEXT:    s_setpc_b64 s[30:31]
81entry:
82  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
83  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
84  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float undef)
85  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
86  %4 = fcmp ule float %3, 1.000000e+00
87  br i1 %4, label %if, label %exit
88
89if:
90  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
91  br label %exit
92
93exit:
94  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
95  ret float %ret
96}
97
98define float @fold_abs_in_branch_poison(float %arg1, float %arg2) {
99; GFX10-LABEL: fold_abs_in_branch_poison:
100; GFX10:       ; %bb.0: ; %entry
101; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX10-NEXT:    s_setpc_b64 s[30:31]
103entry:
104  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
105  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
106  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float poison)
107  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
108  %4 = fcmp ule float %3, 1.000000e+00
109  br i1 %4, label %if, label %exit
110
111if:
112  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
113  br label %exit
114
115exit:
116  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
117  ret float %ret
118}
119
120define float @fold_abs_in_branch_fabs(float %arg1, float %arg2) {
121; GFX10-LABEL: fold_abs_in_branch_fabs:
122; GFX10:       ; %bb.0: ; %entry
123; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
125; GFX10-NEXT:    s_mov_b32 s4, exec_lo
126; GFX10-NEXT:    v_add_f32_e32 v1, v0, v1
127; GFX10-NEXT:    v_add_f32_e64 v0, |v1|, |v1|
128; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
129; GFX10-NEXT:  ; %bb.1: ; %if
130; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |v1|
131; GFX10-NEXT:  ; %bb.2: ; %exit
132; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
133; GFX10-NEXT:    s_setpc_b64 s[30:31]
134entry:
135  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
136  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
137  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
138  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
139  %4 = fcmp ule float %3, 1.000000e+00
140  br i1 %4, label %if, label %exit
141
142if:
143  %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %2)
144  %if.3 = fmul reassoc nnan nsz arcp contract afn float %if.fabs, 0x3FC99999A0000000
145  br label %exit
146
147exit:
148  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
149  ret float %ret
150}
151
152define float @fold_abs_in_branch_phi(float %arg1, float %arg2) {
153; GFX10-LABEL: fold_abs_in_branch_phi:
154; GFX10:       ; %bb.0: ; %entry
155; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
157; GFX10-NEXT:    s_mov_b32 s4, exec_lo
158; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
159; GFX10-NEXT:    v_add_f32_e64 v0, |v0|, |v0|
160; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
161; GFX10-NEXT:    s_cbranch_execz .LBB5_3
162; GFX10-NEXT:  ; %bb.1: ; %header.preheader
163; GFX10-NEXT:    ; implicit-def: $vgpr0
164; GFX10-NEXT:  .LBB5_2: ; %header
165; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
166; GFX10-NEXT:    v_mul_f32_e32 v0, 0x40400000, v0
167; GFX10-NEXT:    v_cmp_lt_f32_e32 vcc_lo, -1.0, v0
168; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
169; GFX10-NEXT:    s_cbranch_vccnz .LBB5_2
170; GFX10-NEXT:  .LBB5_3: ; %Flow1
171; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
172; GFX10-NEXT:    s_setpc_b64 s[30:31]
173entry:
174  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
175  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
176  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
177  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
178  %4 = fcmp ule float %3, 1.000000e+00
179  br i1 %4, label %header, label %exit
180
181header:
182  %h.fabs.phi = phi float [ undef, %entry ], [ %l.fabs, %l ]
183  %h.fmul = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 2.000000e+00
184  %l.1 = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 3.000000e+00
185  br label %l
186
187l:
188  %l.e = fcmp ule float %l.1, -1.000000e+00
189  %l.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %l.1)
190  br i1 %l.e, label %exit, label %header
191
192exit:
193  %ret = phi float [ %3, %entry ], [ %l.fabs, %l ]
194  ret float %ret
195}
196
197define float @fold_neg_in_branch(float %arg1, float %arg2) {
198; GFX10-LABEL: fold_neg_in_branch:
199; GFX10:       ; %bb.0: ; %entry
200; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
202; GFX10-NEXT:    s_mov_b32 s4, exec_lo
203; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
204; GFX10-NEXT:    v_mov_b32_e32 v1, v0
205; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
206; GFX10-NEXT:  ; %bb.1: ; %if
207; GFX10-NEXT:    v_rcp_f32_e64 v1, -v0
208; GFX10-NEXT:    v_mul_f32_e64 v1, |v0|, v1
209; GFX10-NEXT:  ; %bb.2: ; %exit
210; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
211; GFX10-NEXT:    v_mul_f32_e64 v0, -v0, v1
212; GFX10-NEXT:    s_setpc_b64 s[30:31]
213entry:
214  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
215  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
216  %2 = fneg reassoc nnan nsz arcp contract afn float %1
217  %3 = fcmp ule float %1, 1.000000e+00
218  br i1 %3, label %if, label %exit
219
220if:
221  %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
222  %if.3 = fdiv reassoc nnan nsz arcp contract afn float %if.fabs, %2
223  br label %exit
224
225exit:
226  %ret = phi float [ %1, %entry ], [ %if.3, %if ]
227  %ret.2 = fmul reassoc nnan nsz arcp contract afn float %2, %ret
228  ret float %ret.2
229}
230
231declare float @llvm.fabs.f32(float)
232declare float @llvm.fmuladd.f32(float, float, float) #0
233