1c931f2e6SPravin Jagtap; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2a2dfc9acSpaperchalice; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s 3a2dfc9acSpaperchalice; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s 408701e35SPravin Jagtap 508701e35SPravin Jagtap; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations. 608701e35SPravin Jagtap; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction 708701e35SPravin Jagtap; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes. 8c931f2e6SPravin Jagtap 9c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { 10ae63db78SJay Foad; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe( 11ae63db78SJay Foad; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 12ae63db78SJay Foad; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 13ae63db78SJay Foad; IR: 2: 14ae63db78SJay Foad; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 15ae63db78SJay Foad; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 16ae63db78SJay Foad; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 17ae63db78SJay Foad; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 18ae63db78SJay Foad; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 19ae63db78SJay Foad; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 20ae63db78SJay Foad; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) 21ae63db78SJay Foad; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 22ae63db78SJay Foad; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float 23ae63db78SJay Foad; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]] 24ae63db78SJay Foad; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 25ae63db78SJay Foad; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 26ae63db78SJay Foad; IR: 14: 27ae63db78SJay Foad; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 28ae63db78SJay Foad; IR-NEXT: br label [[TMP16]] 29ae63db78SJay Foad; IR: 16: 30ae63db78SJay Foad; IR-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 31ae63db78SJay Foad; IR-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) 32ae63db78SJay Foad; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to float 33ae63db78SJay Foad; IR-NEXT: [[TMP20:%.*]] = fmul float [[VAL]], [[TMP19]] 34ae63db78SJay Foad; IR-NEXT: [[TMP21:%.*]] = fadd float [[TMP18]], [[TMP20]] 35ae63db78SJay Foad; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 36ae63db78SJay Foad; IR-NEXT: br label [[TMP23]] 37ae63db78SJay Foad; IR: 23: 38ae63db78SJay Foad; IR-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 39ae63db78SJay Foad; IR-NEXT: ret float [[TMP24]] 40c931f2e6SPravin Jagtap; 41c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 42c931f2e6SPravin Jagtap ret float %result 43c931f2e6SPravin Jagtap} 44c931f2e6SPravin Jagtap 45c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { 46c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( 47f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 48ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 49f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 50f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 51f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 52f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 53f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 54f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 55f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 56f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 57f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 58f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 59ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 60f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 61f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 62f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 632a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 64ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP22:%.*]] 65ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 66ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 67ae63db78SJay Foad; IR-ITERATIVE: 17: 68ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 69ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 70f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 71ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 72ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 73ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 74ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 75ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 76ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) 77ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) 78ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = fadd float [[ACCUMULATOR]], [[TMP21]] 79ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 80ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 81ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 82ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 83ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 84f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 85ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 86ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 87c931f2e6SPravin Jagtap; 88c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( 89f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 90ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 91f09360d2SPravin Jagtap; IR-DPP: 2: 92f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 93f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 94f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 95f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 96f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 97f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 982a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) 992a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) 1002a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] 1012a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) 1022a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] 1032a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) 1042a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] 1052a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) 1062a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] 1072a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) 1082a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] 1092a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) 1102a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]] 1112a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) 1122a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) 1132a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) 1142a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1152a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1162a960716SVikram Hegde; IR-DPP: 26: 1172a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 1182a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1192a960716SVikram Hegde; IR-DPP: 28: 1202a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1212a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) 1222a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) 1232a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]] 124ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 125ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 126ae63db78SJay Foad; IR-DPP: 34: 127ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 128ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 129c931f2e6SPravin Jagtap; 130c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 131c931f2e6SPravin Jagtap ret float %result 132c931f2e6SPravin Jagtap} 133c931f2e6SPravin Jagtap 134e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 { 135e96948a6SMatt Arsenault; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( 136f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]] 137ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 138f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 139f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 140f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 141f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 142f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 143f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 144f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 145ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 146ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 147ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 148ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 149ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 150ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 151ae63db78SJay Foad; IR-ITERATIVE: 14: 152ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4 1532a960716SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP16]] 1542a960716SVikram Hegde; IR-ITERATIVE: 16: 155ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 156ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 157ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 158ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 159ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 160ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 161ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 162ae63db78SJay Foad; IR-ITERATIVE: 23: 163ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 164ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP24]] 165c931f2e6SPravin Jagtap; 166e96948a6SMatt Arsenault; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( 167f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]] 168ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 169f09360d2SPravin Jagtap; IR-DPP: 2: 170f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 171f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 172f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 173f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 174f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 175f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 176ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 177ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 178ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 179ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 180ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 181ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 182ae63db78SJay Foad; IR-DPP: 14: 183ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4 184ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 185ae63db78SJay Foad; IR-DPP: 16: 186ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 187ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 188ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 189ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 190ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 191ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 192ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 193ae63db78SJay Foad; IR-DPP: 23: 194ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 195ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP24]] 196c931f2e6SPravin Jagtap; 197c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 198c931f2e6SPravin Jagtap ret float %result 199c931f2e6SPravin Jagtap} 200c931f2e6SPravin Jagtap 201e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 { 202e96948a6SMatt Arsenault; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( 203f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 204ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 205f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 206f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 207f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 208f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 209f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 210f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 211f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 212f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 213f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 214f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 215ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("one-as") monotonic, align 4 216f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 217f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 218f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 2192a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 220ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 221ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 222ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 223ae63db78SJay Foad; IR-ITERATIVE: 17: 224ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 225ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 226f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 227ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 228ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 229ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 230ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 231ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 232ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 233ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 234ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 235ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 236ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 237ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 238ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 239ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 240f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 241ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 242ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 243c931f2e6SPravin Jagtap; 244e96948a6SMatt Arsenault; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( 245f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 246ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 247f09360d2SPravin Jagtap; IR-DPP: 2: 248f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 249f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 250f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 251f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 252f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 253f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 2542a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 2552a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 2562a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2572a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 2582a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2592a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 2602a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2612a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 2622a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2632a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 2642a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2652a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 2662a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 2672a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 2682a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 2692a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 2702a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 2712a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 2722a960716SVikram Hegde; IR-DPP: 26: 2732a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4 2742a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 2752a960716SVikram Hegde; IR-DPP: 28: 2762a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 2772a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 2782a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 2792a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 280ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 281ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 282ae63db78SJay Foad; IR-DPP: 34: 283ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 284ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 285c931f2e6SPravin Jagtap; 286c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 287c931f2e6SPravin Jagtap ret float %result 288c931f2e6SPravin Jagtap} 289c931f2e6SPravin Jagtap 290c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { 291c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( 292f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 293ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 294f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 295f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 296f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 297f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 298f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 299f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 300f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 301ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 302ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 303ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 304ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 305ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 306ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 307ae63db78SJay Foad; IR-ITERATIVE: 14: 308ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 3092a960716SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP16]] 3102a960716SVikram Hegde; IR-ITERATIVE: 16: 311ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 312ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 313ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 314ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 315ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 316ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 317ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 318ae63db78SJay Foad; IR-ITERATIVE: 23: 319ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 320ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP24]] 321c931f2e6SPravin Jagtap; 322c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( 323f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 324ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 325f09360d2SPravin Jagtap; IR-DPP: 2: 326f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 327f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 328f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 329f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 330f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 331f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 332ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 333ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 334ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 335ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 336ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 337ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 338ae63db78SJay Foad; IR-DPP: 14: 339ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 340ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 341ae63db78SJay Foad; IR-DPP: 16: 342ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 343ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 344ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 345ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 346ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 347ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 348ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 349ae63db78SJay Foad; IR-DPP: 23: 350ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 351ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP24]] 352c931f2e6SPravin Jagtap; 353c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 354c931f2e6SPravin Jagtap ret float %result 355c931f2e6SPravin Jagtap} 356c931f2e6SPravin Jagtap 357c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { 358c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( 359f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 360ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 361f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 362f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 363f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 364f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 365f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 366f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 367f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 368f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 369f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 370f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 371ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 372f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 373f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 374f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 3752a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 376ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 377ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 378ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 379ae63db78SJay Foad; IR-ITERATIVE: 17: 380ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 381ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 382f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 383ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 384ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 385ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 386ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 387ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 388ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 389ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 390ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 391ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 392ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 393ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 394ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 395ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 396f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 397ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 398ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 399c931f2e6SPravin Jagtap; 400c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( 401f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 402ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 403f09360d2SPravin Jagtap; IR-DPP: 2: 404f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 405f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 406f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 407f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 408f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 409f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 4102a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 4112a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 4122a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4132a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 4142a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4152a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 4162a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4172a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 4182a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4192a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 4202a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4212a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 4222a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 4232a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 4242a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 4252a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 4262a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 4272a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 4282a960716SVikram Hegde; IR-DPP: 26: 4292a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 4302a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 4312a960716SVikram Hegde; IR-DPP: 28: 4322a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 4332a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 4342a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 4352a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 436ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 437ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 438ae63db78SJay Foad; IR-DPP: 34: 439ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 440ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 441c931f2e6SPravin Jagtap; 442c931f2e6SPravin Jagtap %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 443c931f2e6SPravin Jagtap ret float %result 444c931f2e6SPravin Jagtap} 445c931f2e6SPravin Jagtap 446c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { 44708701e35SPravin Jagtap; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe( 44808701e35SPravin Jagtap; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 449ae63db78SJay Foad; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 45008701e35SPravin Jagtap; IR: 2: 45108701e35SPravin Jagtap; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 45208701e35SPravin Jagtap; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 45308701e35SPravin Jagtap; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 45408701e35SPravin Jagtap; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 45508701e35SPravin Jagtap; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 45608701e35SPravin Jagtap; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 45708701e35SPravin Jagtap; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 45808701e35SPravin Jagtap; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 45908701e35SPravin Jagtap; IR: 10: 46008701e35SPravin Jagtap; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 46108701e35SPravin Jagtap; IR-NEXT: br label [[TMP12]] 46208701e35SPravin Jagtap; IR: 12: 46308701e35SPravin Jagtap; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 4642a960716SVikram Hegde; IR-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 4652a960716SVikram Hegde; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to float 4662a960716SVikram Hegde; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 4672a960716SVikram Hegde; IR-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP16]]) 468ae63db78SJay Foad; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 469ae63db78SJay Foad; IR-NEXT: br label [[TMP19]] 470ae63db78SJay Foad; IR: 19: 471ae63db78SJay Foad; IR-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 472ae63db78SJay Foad; IR-NEXT: ret float [[TMP20]] 473c931f2e6SPravin Jagtap; 474c931f2e6SPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 475c931f2e6SPravin Jagtap ret float %result 476c931f2e6SPravin Jagtap} 477c931f2e6SPravin Jagtap 478c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { 479c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( 480edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 481ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 482edb9fab3SPravin Jagtap; IR-ITERATIVE: 2: 483edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 484edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 485edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 486edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 487edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 488edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 489edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 490edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 491edb9fab3SPravin Jagtap; IR-ITERATIVE: 10: 492ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 493edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 494edb9fab3SPravin Jagtap; IR-ITERATIVE: 12: 495edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 4962a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 497ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP22:%.*]]) 498ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 499ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 500ae63db78SJay Foad; IR-ITERATIVE: 17: 501ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 502ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 503edb9fab3SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 504ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 505ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 506ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 507ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 508ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 509ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) 510ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) 511ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP21]]) 512ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 513ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 514ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 515ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 516ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 517edb9fab3SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 518ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 519ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 520c931f2e6SPravin Jagtap; 521c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( 522edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 523ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 524edb9fab3SPravin Jagtap; IR-DPP: 2: 525edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 526edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 527edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 528edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 529edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 530edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 5312a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) 5322a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) 5332a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]]) 5342a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) 5352a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]]) 5362a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) 5372a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]]) 5382a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) 5392a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]]) 5402a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) 5412a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]]) 5422a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) 5432a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]]) 5442a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) 5452a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) 5462a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) 5472a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 5482a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 5492a960716SVikram Hegde; IR-DPP: 26: 5502a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 5512a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 5522a960716SVikram Hegde; IR-DPP: 28: 5532a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 5542a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) 5552a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) 5562a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.minnum.f32(float [[TMP30]], float [[TMP31]]) 557ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 558ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 559ae63db78SJay Foad; IR-DPP: 34: 560ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 561ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 562c931f2e6SPravin Jagtap; 563c931f2e6SPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 564c931f2e6SPravin Jagtap ret float %result 565c931f2e6SPravin Jagtap} 566c931f2e6SPravin Jagtap 567e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{ 568e96948a6SMatt Arsenault; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( 569edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 570ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 571edb9fab3SPravin Jagtap; IR-ITERATIVE: 2: 572edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 573edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 574edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 575edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 576edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 577edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 578edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 579edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 580edb9fab3SPravin Jagtap; IR-ITERATIVE: 10: 581edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 582edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12]] 583edb9fab3SPravin Jagtap; IR-ITERATIVE: 12: 584edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 5852a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 5862a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 5872a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 5882a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] 589ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 590ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP19]] 591ae63db78SJay Foad; IR-ITERATIVE: 19: 592ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 593ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP20]] 594c931f2e6SPravin Jagtap; 595e96948a6SMatt Arsenault; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( 596edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 597ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 598edb9fab3SPravin Jagtap; IR-DPP: 2: 599edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 600edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 601edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 602edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 603edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 604edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 605edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 606edb9fab3SPravin Jagtap; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 607edb9fab3SPravin Jagtap; IR-DPP: 10: 608edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 609edb9fab3SPravin Jagtap; IR-DPP-NEXT: br label [[TMP12]] 610edb9fab3SPravin Jagtap; IR-DPP: 12: 611edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 6122a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]] 6132a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 6142a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 6152a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 616ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 617ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP19]] 618ae63db78SJay Foad; IR-DPP: 19: 619ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 620ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP20]] 621c931f2e6SPravin Jagtap; 622c931f2e6SPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 623c931f2e6SPravin Jagtap ret float %result 624c931f2e6SPravin Jagtap} 625c931f2e6SPravin Jagtap 626e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{ 627e96948a6SMatt Arsenault; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( 628edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 629ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 630edb9fab3SPravin Jagtap; IR-ITERATIVE: 2: 631edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 632edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 633edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 634edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 635edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 636edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 637edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 638edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 639edb9fab3SPravin Jagtap; IR-ITERATIVE: 10: 640ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 641edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 642edb9fab3SPravin Jagtap; IR-ITERATIVE: 12: 643edb9fab3SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 6442a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 645ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] 646ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 647ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 648ae63db78SJay Foad; IR-ITERATIVE: 17: 649ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 650ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 651edb9fab3SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 652ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 653ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 654ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 655ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 656ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 657ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 658ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 659ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] 660ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 661ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 662ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 663ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 664ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 665edb9fab3SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 666ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 667ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 668c931f2e6SPravin Jagtap; 669e96948a6SMatt Arsenault; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( 670edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 671ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 672edb9fab3SPravin Jagtap; IR-DPP: 2: 673edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 674edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 675edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 676edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 677edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 678edb9fab3SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 6792a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] 6802a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 6812a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] 6822a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 6832a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] 6842a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 6852a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] 6862a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 6872a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 6882a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 6892a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] 6902a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 6912a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] 6922a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 6932a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 6942a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 6952a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 6962a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 6972a960716SVikram Hegde; IR-DPP: 26: 6982a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 6992a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 7002a960716SVikram Hegde; IR-DPP: 28: 7012a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 7022a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 7032a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 7042a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] 705ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 706ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 707ae63db78SJay Foad; IR-DPP: 34: 708ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 709ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 710c931f2e6SPravin Jagtap; 711c931f2e6SPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 712c931f2e6SPravin Jagtap ret float %result 713c931f2e6SPravin Jagtap} 714c931f2e6SPravin Jagtap 715c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { 716c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( 717f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 718ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 719f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 720f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 721f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 722f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 723f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 724f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 725f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 726ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 727ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 728ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 729ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 730ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 731ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 732ae63db78SJay Foad; IR-ITERATIVE: 14: 733ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4 7342a960716SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP16]] 7352a960716SVikram Hegde; IR-ITERATIVE: 16: 736ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 737ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 738ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 739ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 740ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 741ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 742ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 743ae63db78SJay Foad; IR-ITERATIVE: 23: 744ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 745ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP24]] 746c931f2e6SPravin Jagtap; 747c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( 748f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 749ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 750f09360d2SPravin Jagtap; IR-DPP: 2: 751f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 752f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 753f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 754f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 755f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 756f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 757ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 758ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 759ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 760ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 761ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 762ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 763ae63db78SJay Foad; IR-DPP: 14: 764ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4 765ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 766ae63db78SJay Foad; IR-DPP: 16: 767ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 768ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 769ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 770ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 771ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 772ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 773ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 774ae63db78SJay Foad; IR-DPP: 23: 775ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 776ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP24]] 777c931f2e6SPravin Jagtap; 778c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 779c931f2e6SPravin Jagtap ret float %result 780c931f2e6SPravin Jagtap} 781c931f2e6SPravin Jagtap 782c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { 783c931f2e6SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( 784f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 785ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 786f09360d2SPravin Jagtap; IR-ITERATIVE: 2: 787f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 788f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 789f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 790f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 791f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 792f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 793f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 794f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 795f09360d2SPravin Jagtap; IR-ITERATIVE: 10: 796ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] monotonic, align 4 797f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 798f09360d2SPravin Jagtap; IR-ITERATIVE: 12: 799f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 8002a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 801ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 802ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 803ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP17]] 804ae63db78SJay Foad; IR-ITERATIVE: 17: 805ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 806ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret float [[TMP18]] 807f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeLoop: 808ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 809ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 810ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 811ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 812ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 813ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 814ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 815ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 816ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 817ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 818ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 819ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 820ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 821f09360d2SPravin Jagtap; IR-ITERATIVE: ComputeEnd: 822ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 823ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 824c931f2e6SPravin Jagtap; 825c931f2e6SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( 826f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 827ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 828f09360d2SPravin Jagtap; IR-DPP: 2: 829f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 830f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 831f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 832f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 833f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 834f09360d2SPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 8352a960716SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 8362a960716SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 8372a960716SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8382a960716SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 8392a960716SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8402a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 8412a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8422a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 8432a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8442a960716SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 8452a960716SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8462a960716SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 8472a960716SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 8482a960716SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 8492a960716SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 8502a960716SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 8512a960716SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 8522a960716SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 8532a960716SVikram Hegde; IR-DPP: 26: 8542a960716SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4 8552a960716SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 8562a960716SVikram Hegde; IR-DPP: 28: 8572a960716SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 8582a960716SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 8592a960716SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 8602a960716SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 861ae63db78SJay Foad; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 862ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP34]] 863ae63db78SJay Foad; IR-DPP: 34: 864ae63db78SJay Foad; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 865ae63db78SJay Foad; IR-DPP-NEXT: ret float [[TMP35]] 866c931f2e6SPravin Jagtap; 867c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 868c931f2e6SPravin Jagtap ret float %result 869c931f2e6SPravin Jagtap} 870c931f2e6SPravin Jagtap 871c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 { 87208701e35SPravin Jagtap; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe( 87308701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 87408701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 875c931f2e6SPravin Jagtap; 876c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 877c931f2e6SPravin Jagtap ret float %result 878c931f2e6SPravin Jagtap} 879c931f2e6SPravin Jagtap 880c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 { 88108701e35SPravin Jagtap; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe( 88208701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 88308701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 884c931f2e6SPravin Jagtap; 885c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 886c931f2e6SPravin Jagtap ret float %result 887c931f2e6SPravin Jagtap} 888c931f2e6SPravin Jagtap 889e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1 { 890e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp( 89108701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4 89208701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 893c931f2e6SPravin Jagtap; 894c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 895c931f2e6SPravin Jagtap ret float %result 896c931f2e6SPravin Jagtap} 897c931f2e6SPravin Jagtap 898e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1 { 899e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp( 90008701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4 90108701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 902c931f2e6SPravin Jagtap; 903c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 904c931f2e6SPravin Jagtap ret float %result 905c931f2e6SPravin Jagtap} 906c931f2e6SPravin Jagtap 907c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 { 90808701e35SPravin Jagtap; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp( 90908701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 91008701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 911c931f2e6SPravin Jagtap; 912c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 913c931f2e6SPravin Jagtap ret float %result 914c931f2e6SPravin Jagtap} 915c931f2e6SPravin Jagtap 916c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 { 91708701e35SPravin Jagtap; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp( 91808701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 91908701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 920c931f2e6SPravin Jagtap; 921c931f2e6SPravin Jagtap %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 922c931f2e6SPravin Jagtap ret float %result 923c931f2e6SPravin Jagtap} 924c931f2e6SPravin Jagtap 925c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 { 92608701e35SPravin Jagtap; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope( 92708701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 92808701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 929c931f2e6SPravin Jagtap; 930c931f2e6SPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 931c931f2e6SPravin Jagtap ret float %result 932c931f2e6SPravin Jagtap} 933c931f2e6SPravin Jagtap 934c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 { 93508701e35SPravin Jagtap; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope( 93608701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 93708701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 938c931f2e6SPravin Jagtap; 939c931f2e6SPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 940c931f2e6SPravin Jagtap ret float %result 941c931f2e6SPravin Jagtap} 942c931f2e6SPravin Jagtap 943e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1{ 944e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp( 94508701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 94608701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 947c931f2e6SPravin Jagtap; 948c931f2e6SPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 949c931f2e6SPravin Jagtap ret float %result 950c931f2e6SPravin Jagtap} 951c931f2e6SPravin Jagtap 952e96948a6SMatt Arsenaultdefine amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1{ 953e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp( 95408701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 95508701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 956c931f2e6SPravin Jagtap; 957c931f2e6SPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 958c931f2e6SPravin Jagtap ret float %result 959c931f2e6SPravin Jagtap} 960c931f2e6SPravin Jagtap 961c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 { 96208701e35SPravin Jagtap; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp( 96308701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4 96408701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 965c931f2e6SPravin Jagtap; 966c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 967c931f2e6SPravin Jagtap ret float %result 968c931f2e6SPravin Jagtap} 969c931f2e6SPravin Jagtap 970c931f2e6SPravin Jagtapdefine amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 { 97108701e35SPravin Jagtap; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp( 97208701e35SPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4 97308701e35SPravin Jagtap; IR-NEXT: ret float [[RESULT]] 974c931f2e6SPravin Jagtap; 975c931f2e6SPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 976c931f2e6SPravin Jagtap ret float %result 977c931f2e6SPravin Jagtap} 978c931f2e6SPravin Jagtap 979e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 { 980e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe( 981ae63db78SJay Foad; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 982ae63db78SJay Foad; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 983ae63db78SJay Foad; IR: 2: 984ae63db78SJay Foad; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 985ae63db78SJay Foad; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 986ae63db78SJay Foad; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 987ae63db78SJay Foad; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 988ae63db78SJay Foad; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 989ae63db78SJay Foad; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 990ae63db78SJay Foad; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) 991ae63db78SJay Foad; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 992ae63db78SJay Foad; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to double 993ae63db78SJay Foad; IR-NEXT: [[TMP12:%.*]] = fmul double [[VAL:%.*]], [[TMP11]] 994ae63db78SJay Foad; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 995ae63db78SJay Foad; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 996ae63db78SJay Foad; IR: 14: 997ae63db78SJay Foad; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 4 998ae63db78SJay Foad; IR-NEXT: br label [[TMP16]] 999ae63db78SJay Foad; IR: 16: 1000ae63db78SJay Foad; IR-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1001ae63db78SJay Foad; IR-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) 1002ae63db78SJay Foad; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to double 1003ae63db78SJay Foad; IR-NEXT: [[TMP20:%.*]] = fmul double [[VAL]], [[TMP19]] 1004ae63db78SJay Foad; IR-NEXT: [[TMP21:%.*]] = fadd double [[TMP18]], [[TMP20]] 1005ae63db78SJay Foad; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1006ae63db78SJay Foad; IR-NEXT: br label [[TMP23]] 1007ae63db78SJay Foad; IR: 23: 1008ae63db78SJay Foad; IR-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1009ae63db78SJay Foad; IR-NEXT: ret double [[TMP24]] 1010e1a8120aSPravin Jagtap; 1011e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1012e1a8120aSPravin Jagtap ret double %result 1013e1a8120aSPravin Jagtap} 1014e1a8120aSPravin Jagtap 1015e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 { 1016cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe( 1017cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1018cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1019cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1020cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1021cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1022cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1023cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1024cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1025cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1026cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1027cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1028cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1029cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 4 1030cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1031cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1032cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1033cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 1034cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[TMP22:%.*]] 1035cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1036cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1037cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1038cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1039cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1040cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1041cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1042cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1043cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1044cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 1045cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1046cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) 1047cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) 1048cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = fadd double [[ACCUMULATOR]], [[TMP21]] 1049cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1050cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1051cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1052cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1053cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1054cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1055cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1056cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1057cf230e77SVikram Hegde; 1058cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe( 1059cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1060cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1061cf230e77SVikram Hegde; IR-DPP: 2: 1062cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1063cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1064cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1065cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1066cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1067cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1068cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) 1069cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) 1070cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]] 1071cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) 1072cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = fadd double [[TMP11]], [[TMP12]] 1073cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) 1074cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = fadd double [[TMP13]], [[TMP14]] 1075cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) 1076cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = fadd double [[TMP15]], [[TMP16]] 1077cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) 1078cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = fadd double [[TMP17]], [[TMP18]] 1079cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) 1080cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = fadd double [[TMP19]], [[TMP20]] 1081cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) 1082cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) 1083cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) 1084cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1085cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1086cf230e77SVikram Hegde; IR-DPP: 26: 1087cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 4 1088cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1089cf230e77SVikram Hegde; IR-DPP: 28: 1090cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1091cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) 1092cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) 1093cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = fadd double [[TMP30]], [[TMP31]] 1094cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1095cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1096cf230e77SVikram Hegde; IR-DPP: 34: 1097cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1098cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1099e1a8120aSPravin Jagtap; 1100e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1101e1a8120aSPravin Jagtap ret double %result 1102e1a8120aSPravin Jagtap} 1103e1a8120aSPravin Jagtap 1104e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 { 1105ae63db78SJay Foad; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( 1106ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1107ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1108ae63db78SJay Foad; IR-ITERATIVE: 2: 1109ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1110ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1111ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1112ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1113ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1114ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1115ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1116ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1117ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1118ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1119ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1120ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1121ae63db78SJay Foad; IR-ITERATIVE: 14: 1122ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8 1123ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP16]] 1124ae63db78SJay Foad; IR-ITERATIVE: 16: 1125ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1126ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1127ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1128ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1129ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1130ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1131ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 1132ae63db78SJay Foad; IR-ITERATIVE: 23: 1133ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1134ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1135ae63db78SJay Foad; 1136ae63db78SJay Foad; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( 1137ae63db78SJay Foad; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1138ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1139ae63db78SJay Foad; IR-DPP: 2: 1140ae63db78SJay Foad; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1141ae63db78SJay Foad; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1142ae63db78SJay Foad; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1143ae63db78SJay Foad; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1144ae63db78SJay Foad; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1145ae63db78SJay Foad; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1146ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1147ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1148ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1149ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1150ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1151ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1152ae63db78SJay Foad; IR-DPP: 14: 1153ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8 1154ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 1155ae63db78SJay Foad; IR-DPP: 16: 1156ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1157ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1158ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1159ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1160ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1161ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1162ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 1163ae63db78SJay Foad; IR-DPP: 23: 1164ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1165ae63db78SJay Foad; IR-DPP-NEXT: ret double [[TMP24]] 1166e1a8120aSPravin Jagtap; 1167e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1168e1a8120aSPravin Jagtap ret double %result 1169e1a8120aSPravin Jagtap} 1170e1a8120aSPravin Jagtap 1171e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 { 1172cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( 1173cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1174cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1175cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1176cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1177cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1178cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1179cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1180cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1181cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1182cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1183cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1184cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1185cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("one-as") monotonic, align 8 1186cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1187cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1188cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1189cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1190cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1191cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1192cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1193cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1194cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1195cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1196cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1197cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1198cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1199cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1200cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1201cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1202cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1203cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1204cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1205cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1206cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1207cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1208cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1209cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1210cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1211cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1212cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1213cf230e77SVikram Hegde; 1214cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( 1215cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1216cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1217cf230e77SVikram Hegde; IR-DPP: 2: 1218cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1219cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1220cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1221cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1222cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1223cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1224cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1225cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1226cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1227cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1228cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1229cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1230cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1231cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1232cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1233cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1234cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1235cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1236cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1237cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1238cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1239cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1240cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1241cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1242cf230e77SVikram Hegde; IR-DPP: 26: 1243cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("one-as") monotonic, align 8 1244cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1245cf230e77SVikram Hegde; IR-DPP: 28: 1246cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1247cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1248cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1249cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1250cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1251cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1252cf230e77SVikram Hegde; IR-DPP: 34: 1253cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1254cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1255e1a8120aSPravin Jagtap; 1256e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1257e1a8120aSPravin Jagtap ret double %result 1258e1a8120aSPravin Jagtap} 1259e1a8120aSPravin Jagtap 1260e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { 1261ae63db78SJay Foad; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( 1262ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1263ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1264ae63db78SJay Foad; IR-ITERATIVE: 2: 1265ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1266ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1267ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1268ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1269ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1270ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1271ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1272ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1273ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1274ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1275ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1276ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1277ae63db78SJay Foad; IR-ITERATIVE: 14: 1278ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8 1279ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP16]] 1280ae63db78SJay Foad; IR-ITERATIVE: 16: 1281ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1282ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1283ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1284ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1285ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1286ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1287ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 1288ae63db78SJay Foad; IR-ITERATIVE: 23: 1289ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1290ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1291ae63db78SJay Foad; 1292ae63db78SJay Foad; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( 1293ae63db78SJay Foad; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1294ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1295ae63db78SJay Foad; IR-DPP: 2: 1296ae63db78SJay Foad; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1297ae63db78SJay Foad; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1298ae63db78SJay Foad; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1299ae63db78SJay Foad; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1300ae63db78SJay Foad; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1301ae63db78SJay Foad; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1302ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1303ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1304ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1305ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1306ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1307ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1308ae63db78SJay Foad; IR-DPP: 14: 1309ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8 1310ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 1311ae63db78SJay Foad; IR-DPP: 16: 1312ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1313ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1314ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1315ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1316ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1317ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1318ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 1319ae63db78SJay Foad; IR-DPP: 23: 1320ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1321ae63db78SJay Foad; IR-DPP-NEXT: ret double [[TMP24]] 1322e1a8120aSPravin Jagtap; 1323e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1324e1a8120aSPravin Jagtap ret double %result 1325e1a8120aSPravin Jagtap} 1326e1a8120aSPravin Jagtap 1327e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { 1328cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( 1329cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1330cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1331cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1332cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1333cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1334cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1335cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1336cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1337cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1338cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1339cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1340cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1341cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1342cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1343cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1344cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1345cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1346cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1347cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1348cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1349cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1350cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1351cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1352cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1353cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1354cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1355cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1356cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1357cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1358cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1359cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1360cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1361cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1362cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1363cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1364cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1365cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1366cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1367cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1368cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1369cf230e77SVikram Hegde; 1370cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( 1371cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1372cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1373cf230e77SVikram Hegde; IR-DPP: 2: 1374cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1375cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1376cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1377cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1378cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1379cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1380cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1381cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1382cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1383cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1384cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1385cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1386cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1387cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1388cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1389cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1390cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1391cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1392cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1393cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1394cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1395cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1396cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1397cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1398cf230e77SVikram Hegde; IR-DPP: 26: 1399cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1400cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1401cf230e77SVikram Hegde; IR-DPP: 28: 1402cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1403cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1404cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1405cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1406cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1407cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1408cf230e77SVikram Hegde; IR-DPP: 34: 1409cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1410cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1411e1a8120aSPravin Jagtap; 1412e1a8120aSPravin Jagtap %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1413e1a8120aSPravin Jagtap ret double %result 1414e1a8120aSPravin Jagtap} 1415e1a8120aSPravin Jagtap 1416e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 { 1417e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe( 1418e1a8120aSPravin Jagtap; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1419ae63db78SJay Foad; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1420e1a8120aSPravin Jagtap; IR: 2: 1421e1a8120aSPravin Jagtap; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1422e1a8120aSPravin Jagtap; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1423e1a8120aSPravin Jagtap; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1424e1a8120aSPravin Jagtap; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1425e1a8120aSPravin Jagtap; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1426e1a8120aSPravin Jagtap; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1427e1a8120aSPravin Jagtap; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1428e1a8120aSPravin Jagtap; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1429e1a8120aSPravin Jagtap; IR: 10: 1430e1a8120aSPravin Jagtap; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1431e1a8120aSPravin Jagtap; IR-NEXT: br label [[TMP12]] 1432e1a8120aSPravin Jagtap; IR: 12: 1433e1a8120aSPravin Jagtap; IR-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 14342a960716SVikram Hegde; IR-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 14352a960716SVikram Hegde; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to double 14362a960716SVikram Hegde; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 14372a960716SVikram Hegde; IR-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP16]]) 1438ae63db78SJay Foad; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1439ae63db78SJay Foad; IR-NEXT: br label [[TMP19]] 1440ae63db78SJay Foad; IR: 19: 1441ae63db78SJay Foad; IR-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1442ae63db78SJay Foad; IR-NEXT: ret double [[TMP20]] 1443e1a8120aSPravin Jagtap; 1444e1a8120aSPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1445e1a8120aSPravin Jagtap ret double %result 1446e1a8120aSPravin Jagtap} 1447e1a8120aSPravin Jagtap 1448e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 { 1449cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe( 1450cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1451cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1452cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1453cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1454cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1455cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1456cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1457cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1458cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1459cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1460cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1461cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1462cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1463cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1464cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1465cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1466cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 1467cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP22:%.*]]) 1468cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1469cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1470cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1471cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1472cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1473cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1474cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1475cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1476cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1477cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 1478cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1479cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) 1480cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) 1481cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.minnum.f64(double [[ACCUMULATOR]], double [[TMP21]]) 1482cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1483cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1484cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1485cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1486cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1487cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1488cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1489cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1490cf230e77SVikram Hegde; 1491cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe( 1492cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1493cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1494cf230e77SVikram Hegde; IR-DPP: 2: 1495cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1496cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1497cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1498cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1499cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1500cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1501cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) 1502cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) 1503cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.minnum.f64(double [[TMP9]], double [[TMP10]]) 1504cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) 1505cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.minnum.f64(double [[TMP11]], double [[TMP12]]) 1506cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) 1507cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP13]], double [[TMP14]]) 1508cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) 1509cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP15]], double [[TMP16]]) 1510cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) 1511cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.minnum.f64(double [[TMP17]], double [[TMP18]]) 1512cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) 1513cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.minnum.f64(double [[TMP19]], double [[TMP20]]) 1514cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) 1515cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) 1516cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) 1517cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1518cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1519cf230e77SVikram Hegde; IR-DPP: 26: 1520cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1521cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1522cf230e77SVikram Hegde; IR-DPP: 28: 1523cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1524cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) 1525cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) 1526cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.minnum.f64(double [[TMP30]], double [[TMP31]]) 1527cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1528cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1529cf230e77SVikram Hegde; IR-DPP: 34: 1530cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1531cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1532e1a8120aSPravin Jagtap; 1533e1a8120aSPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1534e1a8120aSPravin Jagtap ret double %result 1535e1a8120aSPravin Jagtap} 1536e1a8120aSPravin Jagtap 1537e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{ 1538e96948a6SMatt Arsenault; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( 1539e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1540ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1541e1a8120aSPravin Jagtap; IR-ITERATIVE: 2: 1542e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1543e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1544e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1545e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1546e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1547e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1548e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1549e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1550e1a8120aSPravin Jagtap; IR-ITERATIVE: 10: 1551e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1552e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: br label [[TMP12]] 1553e1a8120aSPravin Jagtap; IR-ITERATIVE: 12: 1554e1a8120aSPravin Jagtap; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 15552a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 15562a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 15572a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 15582a960716SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] 1559ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1560ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP19]] 1561ae63db78SJay Foad; IR-ITERATIVE: 19: 1562ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1563ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret double [[TMP20]] 1564e1a8120aSPravin Jagtap; 1565e96948a6SMatt Arsenault; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( 1566e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1567ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1568e1a8120aSPravin Jagtap; IR-DPP: 2: 1569e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1570e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1571e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1572e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1573e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1574e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1575e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1576e1a8120aSPravin Jagtap; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1577e1a8120aSPravin Jagtap; IR-DPP: 10: 1578e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1579e1a8120aSPravin Jagtap; IR-DPP-NEXT: br label [[TMP12]] 1580e1a8120aSPravin Jagtap; IR-DPP: 12: 1581e1a8120aSPravin Jagtap; IR-DPP-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 15822a960716SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]] 15832a960716SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 15842a960716SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 15852a960716SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 1586ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1587ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP19]] 1588ae63db78SJay Foad; IR-DPP: 19: 1589ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1590ae63db78SJay Foad; IR-DPP-NEXT: ret double [[TMP20]] 1591e1a8120aSPravin Jagtap; 1592e1a8120aSPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1593e1a8120aSPravin Jagtap ret double %result 1594e1a8120aSPravin Jagtap} 1595e1a8120aSPravin Jagtap 1596e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{ 1597cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( 1598cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1599cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1600cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1601cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1602cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1603cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1604cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1605cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1606cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1607cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1608cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1609cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1610cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1611cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1612cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1613cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1614cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1615cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] 1616cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1617cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1618cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1619cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1620cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1621cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1622cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1623cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1624cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1625cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1626cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1627cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1628cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1629cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] 1630cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1631cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1632cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1633cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1634cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1635cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1636cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1637cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1638cf230e77SVikram Hegde; 1639cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( 1640cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1641cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1642cf230e77SVikram Hegde; IR-DPP: 2: 1643cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1644cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1645cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1646cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1647cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1648cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1649cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]] 1650cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1651cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] 1652cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1653cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] 1654cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1655cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] 1656cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1657cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 1658cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1659cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] 1660cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1661cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] 1662cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1663cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1664cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1665cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1666cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1667cf230e77SVikram Hegde; IR-DPP: 26: 1668cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1669cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1670cf230e77SVikram Hegde; IR-DPP: 28: 1671cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1672cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1673cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1674cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP30]], double [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] 1675cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1676cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1677cf230e77SVikram Hegde; IR-DPP: 34: 1678cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1679cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1680e1a8120aSPravin Jagtap; 1681e1a8120aSPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1682e1a8120aSPravin Jagtap ret double %result 1683e1a8120aSPravin Jagtap} 1684e1a8120aSPravin Jagtap 1685e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { 1686ae63db78SJay Foad; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( 1687ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1688ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1689ae63db78SJay Foad; IR-ITERATIVE: 2: 1690ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1691ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1692ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1693ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1694ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1695ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1696ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1697ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1698ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1699ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1700ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1701ae63db78SJay Foad; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1702ae63db78SJay Foad; IR-ITERATIVE: 14: 1703ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4 1704ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP16]] 1705ae63db78SJay Foad; IR-ITERATIVE: 16: 1706ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1707ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1708ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1709ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1710ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1711ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1712ae63db78SJay Foad; IR-ITERATIVE-NEXT: br label [[TMP23]] 1713ae63db78SJay Foad; IR-ITERATIVE: 23: 1714ae63db78SJay Foad; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1715ae63db78SJay Foad; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1716ae63db78SJay Foad; 1717ae63db78SJay Foad; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( 1718ae63db78SJay Foad; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1719ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1720ae63db78SJay Foad; IR-DPP: 2: 1721ae63db78SJay Foad; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1722ae63db78SJay Foad; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1723ae63db78SJay Foad; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1724ae63db78SJay Foad; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1725ae63db78SJay Foad; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1726ae63db78SJay Foad; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1727ae63db78SJay Foad; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1728ae63db78SJay Foad; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1729ae63db78SJay Foad; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1730ae63db78SJay Foad; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1731ae63db78SJay Foad; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1732ae63db78SJay Foad; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1733ae63db78SJay Foad; IR-DPP: 14: 1734ae63db78SJay Foad; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4 1735ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP16]] 1736ae63db78SJay Foad; IR-DPP: 16: 1737ae63db78SJay Foad; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1738ae63db78SJay Foad; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1739ae63db78SJay Foad; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1740ae63db78SJay Foad; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1741ae63db78SJay Foad; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1742ae63db78SJay Foad; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1743ae63db78SJay Foad; IR-DPP-NEXT: br label [[TMP23]] 1744ae63db78SJay Foad; IR-DPP: 23: 1745ae63db78SJay Foad; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1746ae63db78SJay Foad; IR-DPP-NEXT: ret double [[TMP24]] 1747e1a8120aSPravin Jagtap; 1748e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1749e1a8120aSPravin Jagtap ret double %result 1750e1a8120aSPravin Jagtap} 1751e1a8120aSPravin Jagtap 1752e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { 1753cf230e77SVikram Hegde; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( 1754cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1755cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1756cf230e77SVikram Hegde; IR-ITERATIVE: 2: 1757cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1758cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1759cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1760cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1761cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1762cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1763cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1764cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1765cf230e77SVikram Hegde; IR-ITERATIVE: 10: 1766cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] monotonic, align 4 1767cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1768cf230e77SVikram Hegde; IR-ITERATIVE: 12: 1769cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1770cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1771cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1772cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1773cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br label [[TMP17]] 1774cf230e77SVikram Hegde; IR-ITERATIVE: 17: 1775cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1776cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1777cf230e77SVikram Hegde; IR-ITERATIVE: ComputeLoop: 1778cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1779cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1780cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1781cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1782cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1783cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1784cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1785cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1786cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1787cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1788cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1789cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1790cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1791cf230e77SVikram Hegde; IR-ITERATIVE: ComputeEnd: 1792cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1793cf230e77SVikram Hegde; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1794cf230e77SVikram Hegde; 1795cf230e77SVikram Hegde; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( 1796cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1797cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1798cf230e77SVikram Hegde; IR-DPP: 2: 1799cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1800cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1801cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1802cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1803cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1804cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1805cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1806cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1807cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1808cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1809cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1810cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1811cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1812cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1813cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1814cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1815cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1816cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1817cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1818cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1819cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1820cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1821cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1822cf230e77SVikram Hegde; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1823cf230e77SVikram Hegde; IR-DPP: 26: 1824cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] monotonic, align 4 1825cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP28]] 1826cf230e77SVikram Hegde; IR-DPP: 28: 1827cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1828cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1829cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1830cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1831cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1832cf230e77SVikram Hegde; IR-DPP-NEXT: br label [[TMP34]] 1833cf230e77SVikram Hegde; IR-DPP: 34: 1834cf230e77SVikram Hegde; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1835cf230e77SVikram Hegde; IR-DPP-NEXT: ret double [[TMP35]] 1836e1a8120aSPravin Jagtap; 1837e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1838e1a8120aSPravin Jagtap ret double %result 1839e1a8120aSPravin Jagtap} 1840e1a8120aSPravin Jagtap 1841e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double inreg %val) #0 { 1842e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe( 1843e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4 1844e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1845e1a8120aSPravin Jagtap; 1846e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1847e1a8120aSPravin Jagtap ret double %result 1848e1a8120aSPravin Jagtap} 1849e1a8120aSPravin Jagtap 1850e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double %val) #0 { 1851e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe( 1852e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4 1853e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1854e1a8120aSPravin Jagtap; 1855e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1856e1a8120aSPravin Jagtap ret double %result 1857e1a8120aSPravin Jagtap} 1858e1a8120aSPravin Jagtap 1859e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1 { 1860e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp( 1861e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8 1862e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1863e1a8120aSPravin Jagtap; 1864e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1865e1a8120aSPravin Jagtap ret double %result 1866e1a8120aSPravin Jagtap} 1867e1a8120aSPravin Jagtap 1868e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1 { 1869e96948a6SMatt Arsenault; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp( 1870e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8 1871e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1872e1a8120aSPravin Jagtap; 1873e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1874e1a8120aSPravin Jagtap ret double %result 1875e1a8120aSPravin Jagtap} 1876e1a8120aSPravin Jagtap 1877e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 { 1878e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp( 1879e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1880e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1881e1a8120aSPravin Jagtap; 1882e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1883e1a8120aSPravin Jagtap ret double %result 1884e1a8120aSPravin Jagtap} 1885e1a8120aSPravin Jagtap 1886e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 { 1887e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp( 1888e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1889e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1890e1a8120aSPravin Jagtap; 1891e1a8120aSPravin Jagtap %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1892e1a8120aSPravin Jagtap ret double %result 1893e1a8120aSPravin Jagtap} 1894e1a8120aSPravin Jagtap 1895e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fmin_double_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, double inreg %val) #0 { 1896e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fmin_double_div_address_uni_value_agent_scope( 1897e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1898e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1899e1a8120aSPravin Jagtap; 1900e1a8120aSPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1901e1a8120aSPravin Jagtap ret double %result 1902e1a8120aSPravin Jagtap} 1903e1a8120aSPravin Jagtap 1904e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fmin_double_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, double %val) #0 { 1905e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fmin_double_div_address_div_value_agent_scope( 1906e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1907e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1908e1a8120aSPravin Jagtap; 1909e1a8120aSPravin Jagtap %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1910e1a8120aSPravin Jagtap ret double %result 1911e1a8120aSPravin Jagtap} 1912e1a8120aSPravin Jagtap 1913e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1{ 1914e96948a6SMatt Arsenault; IR-LABEL: @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp( 1915e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1916e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1917e1a8120aSPravin Jagtap; 1918e1a8120aSPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1919e1a8120aSPravin Jagtap ret double %result 1920e1a8120aSPravin Jagtap} 1921e1a8120aSPravin Jagtap 1922e96948a6SMatt Arsenaultdefine amdgpu_ps double @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1{ 1923e96948a6SMatt Arsenault; IR-LABEL: @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp( 1924e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1925e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1926e1a8120aSPravin Jagtap; 1927e1a8120aSPravin Jagtap %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1928e1a8120aSPravin Jagtap ret double %result 1929e1a8120aSPravin Jagtap} 1930e1a8120aSPravin Jagtap 1931e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 { 1932e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp( 1933e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4 1934e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1935e1a8120aSPravin Jagtap; 1936e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1937e1a8120aSPravin Jagtap ret double %result 1938e1a8120aSPravin Jagtap} 1939e1a8120aSPravin Jagtap 1940e1a8120aSPravin Jagtapdefine amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 { 1941e1a8120aSPravin Jagtap; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp( 1942e1a8120aSPravin Jagtap; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4 1943e1a8120aSPravin Jagtap; IR-NEXT: ret double [[RESULT]] 1944e1a8120aSPravin Jagtap; 1945e1a8120aSPravin Jagtap %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1946e1a8120aSPravin Jagtap ret double %result 1947e1a8120aSPravin Jagtap} 1948e1a8120aSPravin Jagtap 1949*dfda9c5bSMatt Arsenaultattributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 1950*dfda9c5bSMatt Arsenaultattributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 1951c931f2e6SPravin Jagtapattributes #2 = { strictfp } 1952