1f09360d2SPravin Jagtap; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2a2dfc9acSpaperchalice; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s 3a2dfc9acSpaperchalice; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s 4f09360d2SPravin Jagtapdeclare i32 @llvm.amdgcn.workitem.id.x() 5f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 { 6f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value( 7f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 8f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 9f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 10f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 11f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 12f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 13f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) 14f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 15f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 16f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] 17f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 18f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] 19f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 20f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 21f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP14]] 22f09360d2SPravin Jagtap; IR-ITERATIVE: 14: 23f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: ret void 24f09360d2SPravin Jagtap; 25f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_uni_value( 26f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 27f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 28f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 29f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 30f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 31f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 32f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) 33f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 34f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 35f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] 36f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 37f09360d2SPravin Jagtap; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] 38f09360d2SPravin Jagtap; IR-DPP: 12: 39f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 40f09360d2SPravin Jagtap; IR-DPP-NEXT: br label [[TMP14]] 41f09360d2SPravin Jagtap; IR-DPP: 14: 42f09360d2SPravin Jagtap; IR-DPP-NEXT: ret void 43f09360d2SPravin Jagtap; 44f09360d2SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst 45f09360d2SPravin Jagtap ret void 46f09360d2SPravin Jagtap} 47f09360d2SPravin Jagtap 48f09360d2SPravin Jagtap 49f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 { 50f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value( 51f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 52f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float 53f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 54f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 55f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 56f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 57f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 58f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 59f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 60f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 61f09360d2SPravin Jagtap; IR-ITERATIVE: 8: 62*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4 63f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] 64f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 65f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: ret void 66f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 67*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ] 68*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ] 69f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 70f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 71*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]]) 72*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]] 73*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]] 74*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1 75*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]] 76*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0 77*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] 78f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 79*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0 80*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]] 81f09360d2SPravin Jagtap; 82f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_div_value( 83f09360d2SPravin Jagtap; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 84f09360d2SPravin Jagtap; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float 85f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 86f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 87f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 88f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 89f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 90f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 91*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00) 92*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false) 93*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]] 94*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false) 95*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] 96*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false) 97*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] 98*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false) 99*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] 100*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false) 101*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] 102*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false) 103*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] 104*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63) 105*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]]) 106*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0 107*2a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] 108*2a960716SVikram Hegde; IR-DPP: 23: 109*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4 110*2a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP25]] 111*2a960716SVikram Hegde; IR-DPP: 25: 112f09360d2SPravin Jagtap; IR-DPP-NEXT: ret void 113f09360d2SPravin Jagtap; 114f09360d2SPravin Jagtap %id.x = call i32 @llvm.amdgcn.workitem.id.x() 115f09360d2SPravin Jagtap %divValue = bitcast i32 %id.x to float 116f09360d2SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue seq_cst 117f09360d2SPravin Jagtap ret void 118f09360d2SPravin Jagtap} 119f09360d2SPravin Jagtap 120f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 { 121f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value( 122f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 123f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 124f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 125f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 126f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 127f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 128f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) 129f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 130f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 131f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] 132f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 133f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] 134f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 135f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 136f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP14]] 137f09360d2SPravin Jagtap; IR-ITERATIVE: 14: 138f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: ret void 139f09360d2SPravin Jagtap; 140f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_uni_value( 141f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 142f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 143f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 144f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 145f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 146f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 147f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) 148f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 149f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 150f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] 151f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 152f09360d2SPravin Jagtap; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] 153f09360d2SPravin Jagtap; IR-DPP: 12: 154f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 155f09360d2SPravin Jagtap; IR-DPP-NEXT: br label [[TMP14]] 156f09360d2SPravin Jagtap; IR-DPP: 14: 157f09360d2SPravin Jagtap; IR-DPP-NEXT: ret void 158f09360d2SPravin Jagtap; 159f09360d2SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst 160f09360d2SPravin Jagtap ret void 161f09360d2SPravin Jagtap} 162f09360d2SPravin Jagtap 163f09360d2SPravin Jagtap 164f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 { 165f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value( 166f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 167f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float 168f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 169f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 170f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 171f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 172f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 173f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 174f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 175f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 176f09360d2SPravin Jagtap; IR-ITERATIVE: 8: 177*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4 178f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] 179f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 180f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: ret void 181f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 182*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ] 183*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ] 184f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 185f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 186*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]]) 187*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]] 188*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]] 189*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1 190*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]] 191*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0 192*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] 193f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 194*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0 195*2a960716SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]] 196f09360d2SPravin Jagtap; 197f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_div_value( 198f09360d2SPravin Jagtap; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 199f09360d2SPravin Jagtap; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float 200f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 201f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 202f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 203f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 204f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 205f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 206*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00) 207*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false) 208*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]] 209*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false) 210*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] 211*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false) 212*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] 213*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false) 214*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] 215*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false) 216*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] 217*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false) 218*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] 219*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63) 220*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]]) 221*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0 222*2a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] 223*2a960716SVikram Hegde; IR-DPP: 23: 224*2a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4 225*2a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP25]] 226*2a960716SVikram Hegde; IR-DPP: 25: 227f09360d2SPravin Jagtap; IR-DPP-NEXT: ret void 228f09360d2SPravin Jagtap; 229f09360d2SPravin Jagtap %id.x = call i32 @llvm.amdgcn.workitem.id.x() 230f09360d2SPravin Jagtap %divValue = bitcast i32 %id.x to float 231f09360d2SPravin Jagtap %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst 232f09360d2SPravin Jagtap ret void 233f09360d2SPravin Jagtap} 234f09360d2SPravin Jagtap 235f09360d2SPravin Jagtapattributes #0 = {"target-cpu"="gfx906"} 236