xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fold-fabs.ll (revision f78b3466caa9296b32ec235dee87ace2dea94507)
180f442e1SThomas Symalla; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -disable-machine-sink=1 - < %s | FileCheck -check-prefix=GFX10 %s
380f442e1SThomas Symalla
480f442e1SThomas Symalladefine float @fold_abs_in_branch(float %arg1, float %arg2) {
580f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch:
680f442e1SThomas Symalla; GFX10:       ; %bb.0: ; %entry
780f442e1SThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
980f442e1SThomas Symalla; GFX10-NEXT:    s_mov_b32 s4, exec_lo
1080f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v1, v0, v1
1180f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v0, |v1|, |v1|
1280f442e1SThomas Symalla; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
1380f442e1SThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %if
1491a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |v1|
1580f442e1SThomas Symalla; GFX10-NEXT:  ; %bb.2: ; %exit
1680f442e1SThomas Symalla; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
1780f442e1SThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
1880f442e1SThomas Symallaentry:
1980f442e1SThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
2080f442e1SThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
2180f442e1SThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
2280f442e1SThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
2380f442e1SThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
2480f442e1SThomas Symalla  br i1 %4, label %if, label %exit
2580f442e1SThomas Symalla
2680f442e1SThomas Symallaif:
2780f442e1SThomas Symalla  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
2880f442e1SThomas Symalla  br label %exit
2980f442e1SThomas Symalla
3080f442e1SThomas Symallaexit:
3180f442e1SThomas Symalla  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
3280f442e1SThomas Symalla  ret float %ret
3380f442e1SThomas Symalla}
3480f442e1SThomas Symalla
35d262a11aSThomas Symalladefine float @fold_abs_in_branch_multiple_users(float %arg1, float %arg2) {
36d262a11aSThomas Symalla; GFX10-LABEL: fold_abs_in_branch_multiple_users:
37d262a11aSThomas Symalla; GFX10:       ; %bb.0: ; %entry
38d262a11aSThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39d262a11aSThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
40d262a11aSThomas Symalla; GFX10-NEXT:    s_mov_b32 s4, exec_lo
4191a7aa4cSThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
4291a7aa4cSThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v1, |v0|, |v0|
4391a7aa4cSThomas Symalla; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v1
44d262a11aSThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %if
4591a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v1, 0x3e4ccccd, |v0|
46d262a11aSThomas Symalla; GFX10-NEXT:  ; %bb.2: ; %exit
47d262a11aSThomas Symalla; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
4891a7aa4cSThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v0, |v0|, 2.0
4991a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
50d262a11aSThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
51d262a11aSThomas Symallaentry:
52d262a11aSThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
53d262a11aSThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
54d262a11aSThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
55d262a11aSThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
56d262a11aSThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
57d262a11aSThomas Symalla  br i1 %4, label %if, label %exit
58d262a11aSThomas Symalla
59d262a11aSThomas Symallaif:
60d262a11aSThomas Symalla  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
61d262a11aSThomas Symalla  br label %exit
62d262a11aSThomas Symalla
63d262a11aSThomas Symallaexit:
64d262a11aSThomas Symalla  %exit.phi = phi float [ %3, %entry ], [ %if.3, %if ]
65d262a11aSThomas Symalla  %ret.0 = fadd reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
66d262a11aSThomas Symalla  %ret.1 = fmul float %ret.0, %exit.phi
67d262a11aSThomas Symalla  ret float %ret.1
68d262a11aSThomas Symalla}
69d262a11aSThomas Symalla
7080f442e1SThomas Symalladefine float @fold_abs_in_branch_undef(float %arg1, float %arg2) {
7180f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_undef:
7280f442e1SThomas Symalla; GFX10:       ; %bb.0: ; %entry
7380f442e1SThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7480f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v0, |s4|, |s4|
7580f442e1SThomas Symalla; GFX10-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 1.0, v0
76d262a11aSThomas Symalla; GFX10-NEXT:    s_cbranch_vccnz .LBB2_2
7780f442e1SThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %if
7891a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |s4|
79d262a11aSThomas Symalla; GFX10-NEXT:  .LBB2_2: ; %exit
8080f442e1SThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
8180f442e1SThomas Symallaentry:
8280f442e1SThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
8380f442e1SThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
8480f442e1SThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float undef)
8580f442e1SThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
8680f442e1SThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
8780f442e1SThomas Symalla  br i1 %4, label %if, label %exit
8880f442e1SThomas Symalla
8980f442e1SThomas Symallaif:
9080f442e1SThomas Symalla  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
9180f442e1SThomas Symalla  br label %exit
9280f442e1SThomas Symalla
9380f442e1SThomas Symallaexit:
9480f442e1SThomas Symalla  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
9580f442e1SThomas Symalla  ret float %ret
9680f442e1SThomas Symalla}
9780f442e1SThomas Symalla
9880f442e1SThomas Symalladefine float @fold_abs_in_branch_poison(float %arg1, float %arg2) {
9980f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_poison:
10080f442e1SThomas Symalla; GFX10:       ; %bb.0: ; %entry
10180f442e1SThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10280f442e1SThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
10380f442e1SThomas Symallaentry:
10480f442e1SThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
10580f442e1SThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
10680f442e1SThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float poison)
10780f442e1SThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
10880f442e1SThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
10980f442e1SThomas Symalla  br i1 %4, label %if, label %exit
11080f442e1SThomas Symalla
11180f442e1SThomas Symallaif:
11280f442e1SThomas Symalla  %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000
11380f442e1SThomas Symalla  br label %exit
11480f442e1SThomas Symalla
11580f442e1SThomas Symallaexit:
11680f442e1SThomas Symalla  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
11780f442e1SThomas Symalla  ret float %ret
11880f442e1SThomas Symalla}
11980f442e1SThomas Symalla
12080f442e1SThomas Symalladefine float @fold_abs_in_branch_fabs(float %arg1, float %arg2) {
12180f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_fabs:
12280f442e1SThomas Symalla; GFX10:       ; %bb.0: ; %entry
12380f442e1SThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12480f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
12580f442e1SThomas Symalla; GFX10-NEXT:    s_mov_b32 s4, exec_lo
12680f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v1, v0, v1
12780f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v0, |v1|, |v1|
12880f442e1SThomas Symalla; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
12980f442e1SThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %if
13060a4cb70SThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v0, 0x3e4ccccd, |v1|
13180f442e1SThomas Symalla; GFX10-NEXT:  ; %bb.2: ; %exit
13280f442e1SThomas Symalla; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
13380f442e1SThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
13480f442e1SThomas Symallaentry:
13580f442e1SThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
13680f442e1SThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
13780f442e1SThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
13880f442e1SThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
13980f442e1SThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
14080f442e1SThomas Symalla  br i1 %4, label %if, label %exit
14180f442e1SThomas Symalla
14280f442e1SThomas Symallaif:
14380f442e1SThomas Symalla  %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %2)
14480f442e1SThomas Symalla  %if.3 = fmul reassoc nnan nsz arcp contract afn float %if.fabs, 0x3FC99999A0000000
14580f442e1SThomas Symalla  br label %exit
14680f442e1SThomas Symalla
14780f442e1SThomas Symallaexit:
14880f442e1SThomas Symalla  %ret = phi float [ %3, %entry ], [ %if.3, %if ]
14980f442e1SThomas Symalla  ret float %ret
15080f442e1SThomas Symalla}
15180f442e1SThomas Symalla
15280f442e1SThomas Symalladefine float @fold_abs_in_branch_phi(float %arg1, float %arg2) {
15380f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_phi:
15480f442e1SThomas Symalla; GFX10:       ; %bb.0: ; %entry
15580f442e1SThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15680f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
15780f442e1SThomas Symalla; GFX10-NEXT:    s_mov_b32 s4, exec_lo
15880f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
15980f442e1SThomas Symalla; GFX10-NEXT:    v_add_f32_e64 v0, |v0|, |v0|
16080f442e1SThomas Symalla; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
16191a7aa4cSThomas Symalla; GFX10-NEXT:    s_cbranch_execz .LBB5_3
16291a7aa4cSThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %header.preheader
16391a7aa4cSThomas Symalla; GFX10-NEXT:    ; implicit-def: $vgpr0
16491a7aa4cSThomas Symalla; GFX10-NEXT:  .LBB5_2: ; %header
16580f442e1SThomas Symalla; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
16680f442e1SThomas Symalla; GFX10-NEXT:    v_mul_f32_e32 v0, 0x40400000, v0
16780f442e1SThomas Symalla; GFX10-NEXT:    v_cmp_lt_f32_e32 vcc_lo, -1.0, v0
16880f442e1SThomas Symalla; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
16991a7aa4cSThomas Symalla; GFX10-NEXT:    s_cbranch_vccnz .LBB5_2
17091a7aa4cSThomas Symalla; GFX10-NEXT:  .LBB5_3: ; %Flow1
17180f442e1SThomas Symalla; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
17280f442e1SThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
17380f442e1SThomas Symallaentry:
17480f442e1SThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
17580f442e1SThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
17680f442e1SThomas Symalla  %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
17780f442e1SThomas Symalla  %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
17880f442e1SThomas Symalla  %4 = fcmp ule float %3, 1.000000e+00
17980f442e1SThomas Symalla  br i1 %4, label %header, label %exit
18080f442e1SThomas Symalla
18180f442e1SThomas Symallaheader:
18280f442e1SThomas Symalla  %h.fabs.phi = phi float [ undef, %entry ], [ %l.fabs, %l ]
18380f442e1SThomas Symalla  %h.fmul = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 2.000000e+00
18480f442e1SThomas Symalla  %l.1 = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 3.000000e+00
18580f442e1SThomas Symalla  br label %l
18680f442e1SThomas Symalla
18780f442e1SThomas Symallal:
18880f442e1SThomas Symalla  %l.e = fcmp ule float %l.1, -1.000000e+00
18980f442e1SThomas Symalla  %l.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %l.1)
19080f442e1SThomas Symalla  br i1 %l.e, label %exit, label %header
19180f442e1SThomas Symalla
19280f442e1SThomas Symallaexit:
19380f442e1SThomas Symalla  %ret = phi float [ %3, %entry ], [ %l.fabs, %l ]
19480f442e1SThomas Symalla  ret float %ret
19580f442e1SThomas Symalla}
19680f442e1SThomas Symalla
19791a7aa4cSThomas Symalladefine float @fold_neg_in_branch(float %arg1, float %arg2) {
19891a7aa4cSThomas Symalla; GFX10-LABEL: fold_neg_in_branch:
19991a7aa4cSThomas Symalla; GFX10:       ; %bb.0: ; %entry
20091a7aa4cSThomas Symalla; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20191a7aa4cSThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
20291a7aa4cSThomas Symalla; GFX10-NEXT:    s_mov_b32 s4, exec_lo
20391a7aa4cSThomas Symalla; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
20491a7aa4cSThomas Symalla; GFX10-NEXT:    v_mov_b32_e32 v1, v0
20591a7aa4cSThomas Symalla; GFX10-NEXT:    v_cmpx_nlt_f32_e32 1.0, v0
20691a7aa4cSThomas Symalla; GFX10-NEXT:  ; %bb.1: ; %if
20791a7aa4cSThomas Symalla; GFX10-NEXT:    v_rcp_f32_e64 v1, -v0
20891a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v1, |v0|, v1
20991a7aa4cSThomas Symalla; GFX10-NEXT:  ; %bb.2: ; %exit
21091a7aa4cSThomas Symalla; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
21191a7aa4cSThomas Symalla; GFX10-NEXT:    v_mul_f32_e64 v0, -v0, v1
21291a7aa4cSThomas Symalla; GFX10-NEXT:    s_setpc_b64 s[30:31]
21391a7aa4cSThomas Symallaentry:
21491a7aa4cSThomas Symalla  %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
21591a7aa4cSThomas Symalla  %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
21691a7aa4cSThomas Symalla  %2 = fneg reassoc nnan nsz arcp contract afn float %1
21791a7aa4cSThomas Symalla  %3 = fcmp ule float %1, 1.000000e+00
21891a7aa4cSThomas Symalla  br i1 %3, label %if, label %exit
21991a7aa4cSThomas Symalla
22091a7aa4cSThomas Symallaif:
22191a7aa4cSThomas Symalla  %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1)
22291a7aa4cSThomas Symalla  %if.3 = fdiv reassoc nnan nsz arcp contract afn float %if.fabs, %2
22391a7aa4cSThomas Symalla  br label %exit
22491a7aa4cSThomas Symalla
22591a7aa4cSThomas Symallaexit:
22691a7aa4cSThomas Symalla  %ret = phi float [ %1, %entry ], [ %if.3, %if ]
22791a7aa4cSThomas Symalla  %ret.2 = fmul reassoc nnan nsz arcp contract afn float %2, %ret
22891a7aa4cSThomas Symalla  ret float %ret.2
22991a7aa4cSThomas Symalla}
23091a7aa4cSThomas Symalla
23180f442e1SThomas Symalladeclare float @llvm.fabs.f32(float)
23280f442e1SThomas Symalladeclare float @llvm.fmuladd.f32(float, float, float) #0
233