180f442e1SThomas Symalla; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -disable-machine-sink=1 - < %s | FileCheck -check-prefix=GFX10 %s 380f442e1SThomas Symalla 480f442e1SThomas Symalladefine float @fold_abs_in_branch(float %arg1, float %arg2) { 580f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch: 680f442e1SThomas Symalla; GFX10: ; %bb.0: ; %entry 780f442e1SThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 880f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 980f442e1SThomas Symalla; GFX10-NEXT: s_mov_b32 s4, exec_lo 1080f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v1, v0, v1 1180f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e64 v0, |v1|, |v1| 1280f442e1SThomas Symalla; GFX10-NEXT: v_cmpx_nlt_f32_e32 1.0, v0 1380f442e1SThomas Symalla; GFX10-NEXT: ; %bb.1: ; %if 1491a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v0, 0x3e4ccccd, |v1| 1580f442e1SThomas Symalla; GFX10-NEXT: ; %bb.2: ; %exit 1680f442e1SThomas Symalla; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 1780f442e1SThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 1880f442e1SThomas Symallaentry: 1980f442e1SThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 2080f442e1SThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 2180f442e1SThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1) 2280f442e1SThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 2380f442e1SThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 2480f442e1SThomas Symalla br i1 %4, label %if, label %exit 2580f442e1SThomas Symalla 2680f442e1SThomas Symallaif: 2780f442e1SThomas Symalla %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000 2880f442e1SThomas Symalla br label %exit 2980f442e1SThomas Symalla 3080f442e1SThomas Symallaexit: 3180f442e1SThomas Symalla %ret = phi float [ %3, %entry ], [ %if.3, %if ] 3280f442e1SThomas Symalla ret float %ret 3380f442e1SThomas Symalla} 3480f442e1SThomas Symalla 35d262a11aSThomas Symalladefine float @fold_abs_in_branch_multiple_users(float %arg1, float %arg2) { 36d262a11aSThomas Symalla; GFX10-LABEL: fold_abs_in_branch_multiple_users: 37d262a11aSThomas Symalla; GFX10: ; %bb.0: ; %entry 38d262a11aSThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39d262a11aSThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 40d262a11aSThomas Symalla; GFX10-NEXT: s_mov_b32 s4, exec_lo 4191a7aa4cSThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 4291a7aa4cSThomas Symalla; GFX10-NEXT: v_add_f32_e64 v1, |v0|, |v0| 4391a7aa4cSThomas Symalla; GFX10-NEXT: v_cmpx_nlt_f32_e32 1.0, v1 44d262a11aSThomas Symalla; GFX10-NEXT: ; %bb.1: ; %if 4591a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v1, 0x3e4ccccd, |v0| 46d262a11aSThomas Symalla; GFX10-NEXT: ; %bb.2: ; %exit 47d262a11aSThomas Symalla; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 4891a7aa4cSThomas Symalla; GFX10-NEXT: v_add_f32_e64 v0, |v0|, 2.0 4991a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 50d262a11aSThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 51d262a11aSThomas Symallaentry: 52d262a11aSThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 53d262a11aSThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 54d262a11aSThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1) 55d262a11aSThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 56d262a11aSThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 57d262a11aSThomas Symalla br i1 %4, label %if, label %exit 58d262a11aSThomas Symalla 59d262a11aSThomas Symallaif: 60d262a11aSThomas Symalla %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000 61d262a11aSThomas Symalla br label %exit 62d262a11aSThomas Symalla 63d262a11aSThomas Symallaexit: 64d262a11aSThomas Symalla %exit.phi = phi float [ %3, %entry ], [ %if.3, %if ] 65d262a11aSThomas Symalla %ret.0 = fadd reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 66d262a11aSThomas Symalla %ret.1 = fmul float %ret.0, %exit.phi 67d262a11aSThomas Symalla ret float %ret.1 68d262a11aSThomas Symalla} 69d262a11aSThomas Symalla 7080f442e1SThomas Symalladefine float @fold_abs_in_branch_undef(float %arg1, float %arg2) { 7180f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_undef: 7280f442e1SThomas Symalla; GFX10: ; %bb.0: ; %entry 7380f442e1SThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7480f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4| 7580f442e1SThomas Symalla; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0 76d262a11aSThomas Symalla; GFX10-NEXT: s_cbranch_vccnz .LBB2_2 7780f442e1SThomas Symalla; GFX10-NEXT: ; %bb.1: ; %if 7891a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v0, 0x3e4ccccd, |s4| 79d262a11aSThomas Symalla; GFX10-NEXT: .LBB2_2: ; %exit 8080f442e1SThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 8180f442e1SThomas Symallaentry: 8280f442e1SThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 8380f442e1SThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 8480f442e1SThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float undef) 8580f442e1SThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 8680f442e1SThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 8780f442e1SThomas Symalla br i1 %4, label %if, label %exit 8880f442e1SThomas Symalla 8980f442e1SThomas Symallaif: 9080f442e1SThomas Symalla %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000 9180f442e1SThomas Symalla br label %exit 9280f442e1SThomas Symalla 9380f442e1SThomas Symallaexit: 9480f442e1SThomas Symalla %ret = phi float [ %3, %entry ], [ %if.3, %if ] 9580f442e1SThomas Symalla ret float %ret 9680f442e1SThomas Symalla} 9780f442e1SThomas Symalla 9880f442e1SThomas Symalladefine float @fold_abs_in_branch_poison(float %arg1, float %arg2) { 9980f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_poison: 10080f442e1SThomas Symalla; GFX10: ; %bb.0: ; %entry 10180f442e1SThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10280f442e1SThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 10380f442e1SThomas Symallaentry: 10480f442e1SThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 10580f442e1SThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 10680f442e1SThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float poison) 10780f442e1SThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 10880f442e1SThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 10980f442e1SThomas Symalla br i1 %4, label %if, label %exit 11080f442e1SThomas Symalla 11180f442e1SThomas Symallaif: 11280f442e1SThomas Symalla %if.3 = fmul reassoc nnan nsz arcp contract afn float %2, 0x3FC99999A0000000 11380f442e1SThomas Symalla br label %exit 11480f442e1SThomas Symalla 11580f442e1SThomas Symallaexit: 11680f442e1SThomas Symalla %ret = phi float [ %3, %entry ], [ %if.3, %if ] 11780f442e1SThomas Symalla ret float %ret 11880f442e1SThomas Symalla} 11980f442e1SThomas Symalla 12080f442e1SThomas Symalladefine float @fold_abs_in_branch_fabs(float %arg1, float %arg2) { 12180f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_fabs: 12280f442e1SThomas Symalla; GFX10: ; %bb.0: ; %entry 12380f442e1SThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12480f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 12580f442e1SThomas Symalla; GFX10-NEXT: s_mov_b32 s4, exec_lo 12680f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v1, v0, v1 12780f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e64 v0, |v1|, |v1| 12880f442e1SThomas Symalla; GFX10-NEXT: v_cmpx_nlt_f32_e32 1.0, v0 12980f442e1SThomas Symalla; GFX10-NEXT: ; %bb.1: ; %if 13060a4cb70SThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v0, 0x3e4ccccd, |v1| 13180f442e1SThomas Symalla; GFX10-NEXT: ; %bb.2: ; %exit 13280f442e1SThomas Symalla; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 13380f442e1SThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 13480f442e1SThomas Symallaentry: 13580f442e1SThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 13680f442e1SThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 13780f442e1SThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1) 13880f442e1SThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 13980f442e1SThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 14080f442e1SThomas Symalla br i1 %4, label %if, label %exit 14180f442e1SThomas Symalla 14280f442e1SThomas Symallaif: 14380f442e1SThomas Symalla %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %2) 14480f442e1SThomas Symalla %if.3 = fmul reassoc nnan nsz arcp contract afn float %if.fabs, 0x3FC99999A0000000 14580f442e1SThomas Symalla br label %exit 14680f442e1SThomas Symalla 14780f442e1SThomas Symallaexit: 14880f442e1SThomas Symalla %ret = phi float [ %3, %entry ], [ %if.3, %if ] 14980f442e1SThomas Symalla ret float %ret 15080f442e1SThomas Symalla} 15180f442e1SThomas Symalla 15280f442e1SThomas Symalladefine float @fold_abs_in_branch_phi(float %arg1, float %arg2) { 15380f442e1SThomas Symalla; GFX10-LABEL: fold_abs_in_branch_phi: 15480f442e1SThomas Symalla; GFX10: ; %bb.0: ; %entry 15580f442e1SThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15680f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 15780f442e1SThomas Symalla; GFX10-NEXT: s_mov_b32 s4, exec_lo 15880f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 15980f442e1SThomas Symalla; GFX10-NEXT: v_add_f32_e64 v0, |v0|, |v0| 16080f442e1SThomas Symalla; GFX10-NEXT: v_cmpx_nlt_f32_e32 1.0, v0 16191a7aa4cSThomas Symalla; GFX10-NEXT: s_cbranch_execz .LBB5_3 16291a7aa4cSThomas Symalla; GFX10-NEXT: ; %bb.1: ; %header.preheader 16391a7aa4cSThomas Symalla; GFX10-NEXT: ; implicit-def: $vgpr0 16491a7aa4cSThomas Symalla; GFX10-NEXT: .LBB5_2: ; %header 16580f442e1SThomas Symalla; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 16680f442e1SThomas Symalla; GFX10-NEXT: v_mul_f32_e32 v0, 0x40400000, v0 16780f442e1SThomas Symalla; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, -1.0, v0 16880f442e1SThomas Symalla; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 16991a7aa4cSThomas Symalla; GFX10-NEXT: s_cbranch_vccnz .LBB5_2 17091a7aa4cSThomas Symalla; GFX10-NEXT: .LBB5_3: ; %Flow1 17180f442e1SThomas Symalla; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 17280f442e1SThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 17380f442e1SThomas Symallaentry: 17480f442e1SThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 17580f442e1SThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 17680f442e1SThomas Symalla %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1) 17780f442e1SThomas Symalla %3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00 17880f442e1SThomas Symalla %4 = fcmp ule float %3, 1.000000e+00 17980f442e1SThomas Symalla br i1 %4, label %header, label %exit 18080f442e1SThomas Symalla 18180f442e1SThomas Symallaheader: 18280f442e1SThomas Symalla %h.fabs.phi = phi float [ undef, %entry ], [ %l.fabs, %l ] 18380f442e1SThomas Symalla %h.fmul = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 2.000000e+00 18480f442e1SThomas Symalla %l.1 = fmul reassoc nnan nsz arcp contract afn float %h.fabs.phi, 3.000000e+00 18580f442e1SThomas Symalla br label %l 18680f442e1SThomas Symalla 18780f442e1SThomas Symallal: 18880f442e1SThomas Symalla %l.e = fcmp ule float %l.1, -1.000000e+00 18980f442e1SThomas Symalla %l.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %l.1) 19080f442e1SThomas Symalla br i1 %l.e, label %exit, label %header 19180f442e1SThomas Symalla 19280f442e1SThomas Symallaexit: 19380f442e1SThomas Symalla %ret = phi float [ %3, %entry ], [ %l.fabs, %l ] 19480f442e1SThomas Symalla ret float %ret 19580f442e1SThomas Symalla} 19680f442e1SThomas Symalla 19791a7aa4cSThomas Symalladefine float @fold_neg_in_branch(float %arg1, float %arg2) { 19891a7aa4cSThomas Symalla; GFX10-LABEL: fold_neg_in_branch: 19991a7aa4cSThomas Symalla; GFX10: ; %bb.0: ; %entry 20091a7aa4cSThomas Symalla; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20191a7aa4cSThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 20291a7aa4cSThomas Symalla; GFX10-NEXT: s_mov_b32 s4, exec_lo 20391a7aa4cSThomas Symalla; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 20491a7aa4cSThomas Symalla; GFX10-NEXT: v_mov_b32_e32 v1, v0 20591a7aa4cSThomas Symalla; GFX10-NEXT: v_cmpx_nlt_f32_e32 1.0, v0 20691a7aa4cSThomas Symalla; GFX10-NEXT: ; %bb.1: ; %if 20791a7aa4cSThomas Symalla; GFX10-NEXT: v_rcp_f32_e64 v1, -v0 20891a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v1, |v0|, v1 20991a7aa4cSThomas Symalla; GFX10-NEXT: ; %bb.2: ; %exit 21091a7aa4cSThomas Symalla; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 21191a7aa4cSThomas Symalla; GFX10-NEXT: v_mul_f32_e64 v0, -v0, v1 21291a7aa4cSThomas Symalla; GFX10-NEXT: s_setpc_b64 s[30:31] 21391a7aa4cSThomas Symallaentry: 21491a7aa4cSThomas Symalla %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 21591a7aa4cSThomas Symalla %1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2 21691a7aa4cSThomas Symalla %2 = fneg reassoc nnan nsz arcp contract afn float %1 21791a7aa4cSThomas Symalla %3 = fcmp ule float %1, 1.000000e+00 21891a7aa4cSThomas Symalla br i1 %3, label %if, label %exit 21991a7aa4cSThomas Symalla 22091a7aa4cSThomas Symallaif: 22191a7aa4cSThomas Symalla %if.fabs = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %1) 22291a7aa4cSThomas Symalla %if.3 = fdiv reassoc nnan nsz arcp contract afn float %if.fabs, %2 22391a7aa4cSThomas Symalla br label %exit 22491a7aa4cSThomas Symalla 22591a7aa4cSThomas Symallaexit: 22691a7aa4cSThomas Symalla %ret = phi float [ %1, %entry ], [ %if.3, %if ] 22791a7aa4cSThomas Symalla %ret.2 = fmul reassoc nnan nsz arcp contract afn float %2, %ret 22891a7aa4cSThomas Symalla ret float %ret.2 22991a7aa4cSThomas Symalla} 23091a7aa4cSThomas Symalla 23180f442e1SThomas Symalladeclare float @llvm.fabs.f32(float) 23280f442e1SThomas Symalladeclare float @llvm.fmuladd.f32(float, float, float) #0 233