1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s 3; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s 4 5; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations. 6; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction 7; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes. 8 9define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { 10; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe( 11; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 12; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 13; IR: 2: 14; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 15; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 16; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 17; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 18; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 19; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 20; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) 21; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 22; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float 23; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]] 24; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 25; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 26; IR: 14: 27; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 28; IR-NEXT: br label [[TMP16]] 29; IR: 16: 30; IR-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 31; IR-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) 32; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to float 33; IR-NEXT: [[TMP20:%.*]] = fmul float [[VAL]], [[TMP19]] 34; IR-NEXT: [[TMP21:%.*]] = fadd float [[TMP18]], [[TMP20]] 35; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 36; IR-NEXT: br label [[TMP23]] 37; IR: 23: 38; IR-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 39; IR-NEXT: ret float [[TMP24]] 40; 41 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 42 ret float %result 43} 44 45define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { 46; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( 47; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 48; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 49; IR-ITERATIVE: 2: 50; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 51; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 52; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 53; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 54; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 55; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 56; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 57; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 58; IR-ITERATIVE: 10: 59; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 60; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 61; IR-ITERATIVE: 12: 62; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 63; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 64; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP22:%.*]] 65; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 66; IR-ITERATIVE-NEXT: br label [[TMP17]] 67; IR-ITERATIVE: 17: 68; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 69; IR-ITERATIVE-NEXT: ret float [[TMP18]] 70; IR-ITERATIVE: ComputeLoop: 71; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 72; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 73; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 74; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 75; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 76; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) 77; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) 78; IR-ITERATIVE-NEXT: [[TMP23]] = fadd float [[ACCUMULATOR]], [[TMP21]] 79; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 80; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 81; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 82; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 83; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 84; IR-ITERATIVE: ComputeEnd: 85; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 86; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 87; 88; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( 89; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 90; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 91; IR-DPP: 2: 92; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 93; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 94; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 95; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 96; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 97; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 98; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) 99; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) 100; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] 101; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) 102; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] 103; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) 104; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] 105; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) 106; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] 107; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) 108; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] 109; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) 110; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]] 111; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) 112; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) 113; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) 114; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 115; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 116; IR-DPP: 26: 117; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 118; IR-DPP-NEXT: br label [[TMP28]] 119; IR-DPP: 28: 120; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 121; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) 122; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) 123; IR-DPP-NEXT: [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]] 124; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 125; IR-DPP-NEXT: br label [[TMP34]] 126; IR-DPP: 34: 127; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 128; IR-DPP-NEXT: ret float [[TMP35]] 129; 130 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 131 ret float %result 132} 133 134define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 { 135; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( 136; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]] 137; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 138; IR-ITERATIVE: 2: 139; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 140; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 141; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 142; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 143; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 144; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 145; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 146; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 147; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 148; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 149; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 150; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 151; IR-ITERATIVE: 14: 152; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4 153; IR-ITERATIVE-NEXT: br label [[TMP16]] 154; IR-ITERATIVE: 16: 155; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 156; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 157; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 158; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 159; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 160; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 161; IR-ITERATIVE-NEXT: br label [[TMP23]] 162; IR-ITERATIVE: 23: 163; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 164; IR-ITERATIVE-NEXT: ret float [[TMP24]] 165; 166; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( 167; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]] 168; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 169; IR-DPP: 2: 170; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 171; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 172; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 173; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 174; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 175; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 176; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 177; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 178; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 179; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 180; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 181; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 182; IR-DPP: 14: 183; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4 184; IR-DPP-NEXT: br label [[TMP16]] 185; IR-DPP: 16: 186; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 187; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 188; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 189; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 190; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 191; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 192; IR-DPP-NEXT: br label [[TMP23]] 193; IR-DPP: 23: 194; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 195; IR-DPP-NEXT: ret float [[TMP24]] 196; 197 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 198 ret float %result 199} 200 201define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 { 202; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( 203; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 204; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 205; IR-ITERATIVE: 2: 206; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 207; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 208; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 209; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 210; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 211; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 212; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 213; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 214; IR-ITERATIVE: 10: 215; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("one-as") monotonic, align 4 216; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 217; IR-ITERATIVE: 12: 218; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 219; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 220; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 221; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 222; IR-ITERATIVE-NEXT: br label [[TMP17]] 223; IR-ITERATIVE: 17: 224; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 225; IR-ITERATIVE-NEXT: ret float [[TMP18]] 226; IR-ITERATIVE: ComputeLoop: 227; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 228; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 229; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 230; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 231; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 232; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 233; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 234; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 235; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 236; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 237; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 238; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 239; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 240; IR-ITERATIVE: ComputeEnd: 241; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 242; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 243; 244; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( 245; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 246; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 247; IR-DPP: 2: 248; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 249; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 250; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 251; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 252; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 253; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 254; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 255; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 256; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 257; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 258; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 259; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 260; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 261; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 262; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 263; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 264; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 265; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 266; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 267; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 268; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 269; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 270; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 271; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 272; IR-DPP: 26: 273; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4 274; IR-DPP-NEXT: br label [[TMP28]] 275; IR-DPP: 28: 276; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 277; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 278; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 279; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 280; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 281; IR-DPP-NEXT: br label [[TMP34]] 282; IR-DPP: 34: 283; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 284; IR-DPP-NEXT: ret float [[TMP35]] 285; 286 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 287 ret float %result 288} 289 290define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { 291; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( 292; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 293; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 294; IR-ITERATIVE: 2: 295; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 296; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 297; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 298; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 299; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 300; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 301; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 302; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 303; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 304; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 305; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 306; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 307; IR-ITERATIVE: 14: 308; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 309; IR-ITERATIVE-NEXT: br label [[TMP16]] 310; IR-ITERATIVE: 16: 311; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 312; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 313; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 314; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 315; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 316; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 317; IR-ITERATIVE-NEXT: br label [[TMP23]] 318; IR-ITERATIVE: 23: 319; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 320; IR-ITERATIVE-NEXT: ret float [[TMP24]] 321; 322; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( 323; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 324; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 325; IR-DPP: 2: 326; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 327; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 328; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 329; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 330; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 331; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 332; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 333; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 334; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 335; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 336; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 337; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 338; IR-DPP: 14: 339; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4 340; IR-DPP-NEXT: br label [[TMP16]] 341; IR-DPP: 16: 342; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 343; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 344; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 345; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 346; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 347; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 348; IR-DPP-NEXT: br label [[TMP23]] 349; IR-DPP: 23: 350; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 351; IR-DPP-NEXT: ret float [[TMP24]] 352; 353 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 354 ret float %result 355} 356 357define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { 358; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( 359; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 360; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 361; IR-ITERATIVE: 2: 362; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 363; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 364; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 365; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 366; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 367; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 368; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 369; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 370; IR-ITERATIVE: 10: 371; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 372; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 373; IR-ITERATIVE: 12: 374; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 375; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 376; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 377; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 378; IR-ITERATIVE-NEXT: br label [[TMP17]] 379; IR-ITERATIVE: 17: 380; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 381; IR-ITERATIVE-NEXT: ret float [[TMP18]] 382; IR-ITERATIVE: ComputeLoop: 383; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 384; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 385; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 386; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 387; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 388; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 389; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 390; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 391; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 392; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 393; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 394; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 395; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 396; IR-ITERATIVE: ComputeEnd: 397; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 398; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 399; 400; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( 401; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 402; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 403; IR-DPP: 2: 404; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 405; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 406; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 407; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 408; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 409; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 410; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 411; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 412; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 413; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 414; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 415; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 416; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 417; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 418; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 419; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 420; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 421; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 422; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 423; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 424; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 425; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 426; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 427; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 428; IR-DPP: 26: 429; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 430; IR-DPP-NEXT: br label [[TMP28]] 431; IR-DPP: 28: 432; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 433; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 434; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 435; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 436; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 437; IR-DPP-NEXT: br label [[TMP34]] 438; IR-DPP: 34: 439; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 440; IR-DPP-NEXT: ret float [[TMP35]] 441; 442 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 443 ret float %result 444} 445 446define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { 447; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe( 448; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 449; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 450; IR: 2: 451; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 452; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 453; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 454; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 455; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 456; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 457; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 458; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 459; IR: 10: 460; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 461; IR-NEXT: br label [[TMP12]] 462; IR: 12: 463; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 464; IR-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 465; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to float 466; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 467; IR-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP16]]) 468; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 469; IR-NEXT: br label [[TMP19]] 470; IR: 19: 471; IR-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 472; IR-NEXT: ret float [[TMP20]] 473; 474 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 475 ret float %result 476} 477 478define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { 479; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( 480; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 481; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 482; IR-ITERATIVE: 2: 483; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 484; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 485; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 486; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 487; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 488; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 489; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 490; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 491; IR-ITERATIVE: 10: 492; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 493; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 494; IR-ITERATIVE: 12: 495; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 496; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) 497; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP22:%.*]]) 498; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 499; IR-ITERATIVE-NEXT: br label [[TMP17]] 500; IR-ITERATIVE: 17: 501; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 502; IR-ITERATIVE-NEXT: ret float [[TMP18]] 503; IR-ITERATIVE: ComputeLoop: 504; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 505; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 506; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 507; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 508; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 509; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) 510; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) 511; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP21]]) 512; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 513; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 514; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 515; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 516; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 517; IR-ITERATIVE: ComputeEnd: 518; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 519; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 520; 521; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( 522; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 523; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 524; IR-DPP: 2: 525; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 526; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 527; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 528; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 529; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 530; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 531; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) 532; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) 533; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]]) 534; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) 535; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]]) 536; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) 537; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]]) 538; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) 539; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]]) 540; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) 541; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]]) 542; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) 543; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]]) 544; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) 545; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) 546; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) 547; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 548; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 549; IR-DPP: 26: 550; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 551; IR-DPP-NEXT: br label [[TMP28]] 552; IR-DPP: 28: 553; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 554; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) 555; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) 556; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.minnum.f32(float [[TMP30]], float [[TMP31]]) 557; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 558; IR-DPP-NEXT: br label [[TMP34]] 559; IR-DPP: 34: 560; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 561; IR-DPP-NEXT: ret float [[TMP35]] 562; 563 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 564 ret float %result 565} 566 567define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{ 568; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( 569; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 570; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 571; IR-ITERATIVE: 2: 572; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 573; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 574; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 575; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 576; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 577; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 578; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 579; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 580; IR-ITERATIVE: 10: 581; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 582; IR-ITERATIVE-NEXT: br label [[TMP12]] 583; IR-ITERATIVE: 12: 584; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 585; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 586; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 587; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 588; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] 589; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 590; IR-ITERATIVE-NEXT: br label [[TMP19]] 591; IR-ITERATIVE: 19: 592; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 593; IR-ITERATIVE-NEXT: ret float [[TMP20]] 594; 595; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( 596; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 597; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 598; IR-DPP: 2: 599; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 600; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 601; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 602; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 603; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 604; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 605; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 606; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 607; IR-DPP: 10: 608; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 609; IR-DPP-NEXT: br label [[TMP12]] 610; IR-DPP: 12: 611; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 612; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]] 613; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 614; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] 615; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 616; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]] 617; IR-DPP-NEXT: br label [[TMP19]] 618; IR-DPP: 19: 619; IR-DPP-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 620; IR-DPP-NEXT: ret float [[TMP20]] 621; 622 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 623 ret float %result 624} 625 626define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{ 627; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( 628; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 629; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 630; IR-ITERATIVE: 2: 631; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 632; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 633; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 634; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 635; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 636; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 637; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 638; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 639; IR-ITERATIVE: 10: 640; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 641; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 642; IR-ITERATIVE: 12: 643; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 644; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 645; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] 646; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 647; IR-ITERATIVE-NEXT: br label [[TMP17]] 648; IR-ITERATIVE: 17: 649; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 650; IR-ITERATIVE-NEXT: ret float [[TMP18]] 651; IR-ITERATIVE: ComputeLoop: 652; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 653; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 654; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 655; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 656; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 657; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 658; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 659; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] 660; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 661; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 662; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 663; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 664; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 665; IR-ITERATIVE: ComputeEnd: 666; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 667; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 668; 669; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( 670; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 671; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 672; IR-DPP: 2: 673; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 674; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 675; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 676; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 677; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 678; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 679; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] 680; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 681; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] 682; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 683; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] 684; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 685; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] 686; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 687; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 688; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 689; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] 690; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 691; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] 692; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 693; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 694; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 695; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 696; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 697; IR-DPP: 26: 698; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 699; IR-DPP-NEXT: br label [[TMP28]] 700; IR-DPP: 28: 701; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 702; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 703; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 704; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] 705; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 706; IR-DPP-NEXT: br label [[TMP34]] 707; IR-DPP: 34: 708; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 709; IR-DPP-NEXT: ret float [[TMP35]] 710; 711 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 712 ret float %result 713} 714 715define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { 716; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( 717; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 718; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 719; IR-ITERATIVE: 2: 720; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 721; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 722; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 723; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 724; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 725; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 726; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 727; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 728; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 729; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 730; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 731; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 732; IR-ITERATIVE: 14: 733; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4 734; IR-ITERATIVE-NEXT: br label [[TMP16]] 735; IR-ITERATIVE: 16: 736; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 737; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] 738; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 739; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 740; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 741; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 742; IR-ITERATIVE-NEXT: br label [[TMP23]] 743; IR-ITERATIVE: 23: 744; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 745; IR-ITERATIVE-NEXT: ret float [[TMP24]] 746; 747; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( 748; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 749; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 750; IR-DPP: 2: 751; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 752; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 753; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 754; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 755; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 756; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 757; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 758; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 759; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 760; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 761; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 762; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 763; IR-DPP: 14: 764; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4 765; IR-DPP-NEXT: br label [[TMP16]] 766; IR-DPP: 16: 767; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 768; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] 769; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 770; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 771; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 772; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]] 773; IR-DPP-NEXT: br label [[TMP23]] 774; IR-DPP: 23: 775; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 776; IR-DPP-NEXT: ret float [[TMP24]] 777; 778 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 779 ret float %result 780} 781 782define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { 783; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( 784; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 785; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 786; IR-ITERATIVE: 2: 787; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 788; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 789; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 790; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 791; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 792; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 793; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 794; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 795; IR-ITERATIVE: 10: 796; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] monotonic, align 4 797; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 798; IR-ITERATIVE: 12: 799; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 800; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] 801; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 802; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] 803; IR-ITERATIVE-NEXT: br label [[TMP17]] 804; IR-ITERATIVE: 17: 805; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 806; IR-ITERATIVE-NEXT: ret float [[TMP18]] 807; IR-ITERATIVE: ComputeLoop: 808; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 809; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 810; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 811; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 812; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 813; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 814; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] 815; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 816; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 817; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 818; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 819; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 820; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 821; IR-ITERATIVE: ComputeEnd: 822; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 823; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 824; 825; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( 826; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 827; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 828; IR-DPP: 2: 829; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 830; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 831; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 832; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 833; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 834; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 835; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] 836; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 837; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 838; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 839; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 840; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 841; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 842; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 843; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 844; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 845; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 846; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 847; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 848; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 849; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] 850; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] 851; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 852; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 853; IR-DPP: 26: 854; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4 855; IR-DPP-NEXT: br label [[TMP28]] 856; IR-DPP: 28: 857; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 858; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] 859; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] 860; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 861; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] 862; IR-DPP-NEXT: br label [[TMP34]] 863; IR-DPP: 34: 864; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 865; IR-DPP-NEXT: ret float [[TMP35]] 866; 867 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 868 ret float %result 869} 870 871define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 { 872; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe( 873; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 874; IR-NEXT: ret float [[RESULT]] 875; 876 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 877 ret float %result 878} 879 880define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 { 881; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe( 882; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 883; IR-NEXT: ret float [[RESULT]] 884; 885 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 886 ret float %result 887} 888 889define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1 { 890; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp( 891; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4 892; IR-NEXT: ret float [[RESULT]] 893; 894 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 895 ret float %result 896} 897 898define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1 { 899; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp( 900; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4 901; IR-NEXT: ret float [[RESULT]] 902; 903 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic 904 ret float %result 905} 906 907define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 { 908; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp( 909; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 910; IR-NEXT: ret float [[RESULT]] 911; 912 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 913 ret float %result 914} 915 916define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 { 917; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp( 918; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 919; IR-NEXT: ret float [[RESULT]] 920; 921 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 922 ret float %result 923} 924 925define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 { 926; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope( 927; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 928; IR-NEXT: ret float [[RESULT]] 929; 930 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 931 ret float %result 932} 933 934define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 { 935; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope( 936; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 937; IR-NEXT: ret float [[RESULT]] 938; 939 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 940 ret float %result 941} 942 943define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1{ 944; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp( 945; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 946; IR-NEXT: ret float [[RESULT]] 947; 948 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 949 ret float %result 950} 951 952define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1{ 953; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp( 954; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 955; IR-NEXT: ret float [[RESULT]] 956; 957 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic 958 ret float %result 959} 960 961define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 { 962; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp( 963; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4 964; IR-NEXT: ret float [[RESULT]] 965; 966 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 967 ret float %result 968} 969 970define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 { 971; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp( 972; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4 973; IR-NEXT: ret float [[RESULT]] 974; 975 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 976 ret float %result 977} 978 979define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 { 980; IR-LABEL: @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe( 981; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 982; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 983; IR: 2: 984; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 985; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 986; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 987; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 988; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 989; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 990; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) 991; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 992; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to double 993; IR-NEXT: [[TMP12:%.*]] = fmul double [[VAL:%.*]], [[TMP11]] 994; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 995; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 996; IR: 14: 997; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 4 998; IR-NEXT: br label [[TMP16]] 999; IR: 16: 1000; IR-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1001; IR-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) 1002; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to double 1003; IR-NEXT: [[TMP20:%.*]] = fmul double [[VAL]], [[TMP19]] 1004; IR-NEXT: [[TMP21:%.*]] = fadd double [[TMP18]], [[TMP20]] 1005; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1006; IR-NEXT: br label [[TMP23]] 1007; IR: 23: 1008; IR-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1009; IR-NEXT: ret double [[TMP24]] 1010; 1011 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1012 ret double %result 1013} 1014 1015define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 { 1016; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe( 1017; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1018; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1019; IR-ITERATIVE: 2: 1020; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1021; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1022; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1023; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1024; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1025; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1026; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1027; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1028; IR-ITERATIVE: 10: 1029; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 4 1030; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1031; IR-ITERATIVE: 12: 1032; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1033; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 1034; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[TMP22:%.*]] 1035; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1036; IR-ITERATIVE-NEXT: br label [[TMP17]] 1037; IR-ITERATIVE: 17: 1038; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1039; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1040; IR-ITERATIVE: ComputeLoop: 1041; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1042; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1043; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1044; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 1045; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1046; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) 1047; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) 1048; IR-ITERATIVE-NEXT: [[TMP23]] = fadd double [[ACCUMULATOR]], [[TMP21]] 1049; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1050; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1051; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1052; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1053; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1054; IR-ITERATIVE: ComputeEnd: 1055; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1056; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1057; 1058; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe( 1059; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1060; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1061; IR-DPP: 2: 1062; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1063; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1064; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1065; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1066; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1067; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1068; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) 1069; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) 1070; IR-DPP-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]] 1071; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) 1072; IR-DPP-NEXT: [[TMP13:%.*]] = fadd double [[TMP11]], [[TMP12]] 1073; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) 1074; IR-DPP-NEXT: [[TMP15:%.*]] = fadd double [[TMP13]], [[TMP14]] 1075; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) 1076; IR-DPP-NEXT: [[TMP17:%.*]] = fadd double [[TMP15]], [[TMP16]] 1077; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) 1078; IR-DPP-NEXT: [[TMP19:%.*]] = fadd double [[TMP17]], [[TMP18]] 1079; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) 1080; IR-DPP-NEXT: [[TMP21:%.*]] = fadd double [[TMP19]], [[TMP20]] 1081; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) 1082; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) 1083; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) 1084; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1085; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1086; IR-DPP: 26: 1087; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 4 1088; IR-DPP-NEXT: br label [[TMP28]] 1089; IR-DPP: 28: 1090; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1091; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) 1092; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) 1093; IR-DPP-NEXT: [[TMP32:%.*]] = fadd double [[TMP30]], [[TMP31]] 1094; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1095; IR-DPP-NEXT: br label [[TMP34]] 1096; IR-DPP: 34: 1097; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1098; IR-DPP-NEXT: ret double [[TMP35]] 1099; 1100 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1101 ret double %result 1102} 1103 1104define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 { 1105; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( 1106; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1107; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1108; IR-ITERATIVE: 2: 1109; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1110; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1111; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1112; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1113; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1114; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1115; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1116; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1117; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1118; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1119; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1120; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1121; IR-ITERATIVE: 14: 1122; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8 1123; IR-ITERATIVE-NEXT: br label [[TMP16]] 1124; IR-ITERATIVE: 16: 1125; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1126; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1127; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1128; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1129; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1130; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1131; IR-ITERATIVE-NEXT: br label [[TMP23]] 1132; IR-ITERATIVE: 23: 1133; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1134; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1135; 1136; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( 1137; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1138; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1139; IR-DPP: 2: 1140; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1141; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1142; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1143; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1144; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1145; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1146; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1147; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1148; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1149; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1150; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1151; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1152; IR-DPP: 14: 1153; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8 1154; IR-DPP-NEXT: br label [[TMP16]] 1155; IR-DPP: 16: 1156; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1157; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1158; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1159; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1160; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1161; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1162; IR-DPP-NEXT: br label [[TMP23]] 1163; IR-DPP: 23: 1164; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1165; IR-DPP-NEXT: ret double [[TMP24]] 1166; 1167 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1168 ret double %result 1169} 1170 1171define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 { 1172; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( 1173; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1174; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1175; IR-ITERATIVE: 2: 1176; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1177; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1178; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1179; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1180; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1181; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1182; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1183; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1184; IR-ITERATIVE: 10: 1185; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("one-as") monotonic, align 8 1186; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1187; IR-ITERATIVE: 12: 1188; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1189; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1190; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1191; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1192; IR-ITERATIVE-NEXT: br label [[TMP17]] 1193; IR-ITERATIVE: 17: 1194; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1195; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1196; IR-ITERATIVE: ComputeLoop: 1197; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1198; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1199; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1200; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1201; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1202; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1203; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1204; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1205; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1206; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1207; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1208; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1209; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1210; IR-ITERATIVE: ComputeEnd: 1211; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1212; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1213; 1214; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( 1215; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1216; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1217; IR-DPP: 2: 1218; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1219; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1220; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1221; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1222; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1223; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1224; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1225; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1226; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1227; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1228; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1229; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1230; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1231; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1232; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1233; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1234; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1235; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1236; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1237; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1238; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1239; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1240; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1241; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1242; IR-DPP: 26: 1243; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("one-as") monotonic, align 8 1244; IR-DPP-NEXT: br label [[TMP28]] 1245; IR-DPP: 28: 1246; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1247; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1248; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1249; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1250; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1251; IR-DPP-NEXT: br label [[TMP34]] 1252; IR-DPP: 34: 1253; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1254; IR-DPP-NEXT: ret double [[TMP35]] 1255; 1256 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1257 ret double %result 1258} 1259 1260define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { 1261; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( 1262; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1263; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1264; IR-ITERATIVE: 2: 1265; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1266; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1267; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1268; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1269; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1270; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1271; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1272; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1273; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1274; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1275; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1276; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1277; IR-ITERATIVE: 14: 1278; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8 1279; IR-ITERATIVE-NEXT: br label [[TMP16]] 1280; IR-ITERATIVE: 16: 1281; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1282; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1283; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1284; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1285; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1286; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1287; IR-ITERATIVE-NEXT: br label [[TMP23]] 1288; IR-ITERATIVE: 23: 1289; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1290; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1291; 1292; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( 1293; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1294; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1295; IR-DPP: 2: 1296; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1297; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1298; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1299; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1300; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1301; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1302; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1303; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1304; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1305; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1306; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1307; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1308; IR-DPP: 14: 1309; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8 1310; IR-DPP-NEXT: br label [[TMP16]] 1311; IR-DPP: 16: 1312; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1313; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1314; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1315; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1316; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1317; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1318; IR-DPP-NEXT: br label [[TMP23]] 1319; IR-DPP: 23: 1320; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1321; IR-DPP-NEXT: ret double [[TMP24]] 1322; 1323 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1324 ret double %result 1325} 1326 1327define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { 1328; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( 1329; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1330; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1331; IR-ITERATIVE: 2: 1332; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1333; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1334; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1335; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1336; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1337; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1338; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1339; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1340; IR-ITERATIVE: 10: 1341; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1342; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1343; IR-ITERATIVE: 12: 1344; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1345; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1346; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1347; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1348; IR-ITERATIVE-NEXT: br label [[TMP17]] 1349; IR-ITERATIVE: 17: 1350; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1351; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1352; IR-ITERATIVE: ComputeLoop: 1353; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1354; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1355; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1356; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1357; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1358; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1359; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1360; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1361; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1362; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1363; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1364; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1365; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1366; IR-ITERATIVE: ComputeEnd: 1367; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1368; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1369; 1370; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( 1371; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1372; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1373; IR-DPP: 2: 1374; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1375; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1376; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1377; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1378; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1379; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1380; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1381; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1382; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1383; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1384; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1385; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1386; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1387; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1388; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1389; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1390; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1391; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1392; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1393; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1394; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1395; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1396; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1397; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1398; IR-DPP: 26: 1399; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1400; IR-DPP-NEXT: br label [[TMP28]] 1401; IR-DPP: 28: 1402; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1403; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1404; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1405; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1406; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1407; IR-DPP-NEXT: br label [[TMP34]] 1408; IR-DPP: 34: 1409; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1410; IR-DPP-NEXT: ret double [[TMP35]] 1411; 1412 %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1413 ret double %result 1414} 1415 1416define amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 { 1417; IR-LABEL: @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe( 1418; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1419; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1420; IR: 2: 1421; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1422; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1423; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1424; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1425; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1426; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1427; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1428; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1429; IR: 10: 1430; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1431; IR-NEXT: br label [[TMP12]] 1432; IR: 12: 1433; IR-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 1434; IR-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 1435; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to double 1436; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 1437; IR-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP16]]) 1438; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1439; IR-NEXT: br label [[TMP19]] 1440; IR: 19: 1441; IR-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1442; IR-NEXT: ret double [[TMP20]] 1443; 1444 %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1445 ret double %result 1446} 1447 1448define amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 { 1449; IR-ITERATIVE-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe( 1450; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1451; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1452; IR-ITERATIVE: 2: 1453; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1454; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1455; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1456; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1457; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1458; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1459; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1460; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1461; IR-ITERATIVE: 10: 1462; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1463; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1464; IR-ITERATIVE: 12: 1465; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1466; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) 1467; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP22:%.*]]) 1468; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1469; IR-ITERATIVE-NEXT: br label [[TMP17]] 1470; IR-ITERATIVE: 17: 1471; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1472; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1473; IR-ITERATIVE: ComputeLoop: 1474; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1475; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1476; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1477; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 1478; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1479; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) 1480; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) 1481; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.minnum.f64(double [[ACCUMULATOR]], double [[TMP21]]) 1482; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1483; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1484; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1485; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1486; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1487; IR-ITERATIVE: ComputeEnd: 1488; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1489; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1490; 1491; IR-DPP-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe( 1492; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() 1493; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1494; IR-DPP: 2: 1495; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 1496; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1497; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1498; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1499; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) 1500; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) 1501; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) 1502; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) 1503; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.minnum.f64(double [[TMP9]], double [[TMP10]]) 1504; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) 1505; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.minnum.f64(double [[TMP11]], double [[TMP12]]) 1506; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) 1507; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP13]], double [[TMP14]]) 1508; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) 1509; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP15]], double [[TMP16]]) 1510; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) 1511; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.minnum.f64(double [[TMP17]], double [[TMP18]]) 1512; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) 1513; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.minnum.f64(double [[TMP19]], double [[TMP20]]) 1514; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) 1515; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) 1516; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) 1517; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1518; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1519; IR-DPP: 26: 1520; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1521; IR-DPP-NEXT: br label [[TMP28]] 1522; IR-DPP: 28: 1523; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1524; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) 1525; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) 1526; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.minnum.f64(double [[TMP30]], double [[TMP31]]) 1527; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1528; IR-DPP-NEXT: br label [[TMP34]] 1529; IR-DPP: 34: 1530; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1531; IR-DPP-NEXT: ret double [[TMP35]] 1532; 1533 %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1534 ret double %result 1535} 1536 1537define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{ 1538; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( 1539; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1540; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1541; IR-ITERATIVE: 2: 1542; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1543; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1544; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1545; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1546; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1547; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1548; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1549; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1550; IR-ITERATIVE: 10: 1551; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1552; IR-ITERATIVE-NEXT: br label [[TMP12]] 1553; IR-ITERATIVE: 12: 1554; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 1555; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1556; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1557; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 1558; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] 1559; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1560; IR-ITERATIVE-NEXT: br label [[TMP19]] 1561; IR-ITERATIVE: 19: 1562; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1563; IR-ITERATIVE-NEXT: ret double [[TMP20]] 1564; 1565; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( 1566; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1567; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] 1568; IR-DPP: 2: 1569; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1570; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1571; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1572; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1573; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1574; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1575; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 1576; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] 1577; IR-DPP: 10: 1578; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1579; IR-DPP-NEXT: br label [[TMP12]] 1580; IR-DPP: 12: 1581; IR-DPP-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] 1582; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]] 1583; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1584; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] 1585; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 1586; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]] 1587; IR-DPP-NEXT: br label [[TMP19]] 1588; IR-DPP: 19: 1589; IR-DPP-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ] 1590; IR-DPP-NEXT: ret double [[TMP20]] 1591; 1592 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1593 ret double %result 1594} 1595 1596define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{ 1597; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( 1598; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1599; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1600; IR-ITERATIVE: 2: 1601; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1602; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1603; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1604; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1605; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1606; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1607; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1608; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1609; IR-ITERATIVE: 10: 1610; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 1611; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1612; IR-ITERATIVE: 12: 1613; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1614; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1615; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] 1616; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1617; IR-ITERATIVE-NEXT: br label [[TMP17]] 1618; IR-ITERATIVE: 17: 1619; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1620; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1621; IR-ITERATIVE: ComputeLoop: 1622; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1623; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1624; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1625; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1626; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1627; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1628; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1629; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] 1630; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1631; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1632; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1633; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1634; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1635; IR-ITERATIVE: ComputeEnd: 1636; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1637; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1638; 1639; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( 1640; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1641; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1642; IR-DPP: 2: 1643; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1644; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1645; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1646; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1647; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1648; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1649; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]] 1650; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1651; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] 1652; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1653; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] 1654; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1655; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] 1656; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1657; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] 1658; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1659; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] 1660; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1661; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] 1662; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1663; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1664; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1665; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1666; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1667; IR-DPP: 26: 1668; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8 1669; IR-DPP-NEXT: br label [[TMP28]] 1670; IR-DPP: 28: 1671; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1672; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1673; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1674; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP30]], double [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] 1675; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1676; IR-DPP-NEXT: br label [[TMP34]] 1677; IR-DPP: 34: 1678; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1679; IR-DPP-NEXT: ret double [[TMP35]] 1680; 1681 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1682 ret double %result 1683} 1684 1685define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { 1686; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( 1687; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1688; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1689; IR-ITERATIVE: 2: 1690; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1691; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1692; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1693; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1694; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1695; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1696; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] 1697; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1698; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1699; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1700; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1701; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1702; IR-ITERATIVE: 14: 1703; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4 1704; IR-ITERATIVE-NEXT: br label [[TMP16]] 1705; IR-ITERATIVE: 16: 1706; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1707; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] 1708; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1709; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1710; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1711; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1712; IR-ITERATIVE-NEXT: br label [[TMP23]] 1713; IR-ITERATIVE: 23: 1714; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1715; IR-ITERATIVE-NEXT: ret double [[TMP24]] 1716; 1717; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( 1718; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1719; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] 1720; IR-DPP: 2: 1721; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1722; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1723; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1724; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1725; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1726; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1727; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] 1728; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 1729; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1730; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1731; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 1732; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] 1733; IR-DPP: 14: 1734; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4 1735; IR-DPP-NEXT: br label [[TMP16]] 1736; IR-DPP: 16: 1737; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] 1738; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] 1739; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1740; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1741; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1742; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]] 1743; IR-DPP-NEXT: br label [[TMP23]] 1744; IR-DPP: 23: 1745; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ] 1746; IR-DPP-NEXT: ret double [[TMP24]] 1747; 1748 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1749 ret double %result 1750} 1751 1752define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { 1753; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( 1754; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] 1755; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] 1756; IR-ITERATIVE: 2: 1757; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1758; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1759; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1760; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1761; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] 1762; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] 1763; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] 1764; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] 1765; IR-ITERATIVE: 10: 1766; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] monotonic, align 4 1767; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] 1768; IR-ITERATIVE: 12: 1769; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] 1770; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] 1771; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1772; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] 1773; IR-ITERATIVE-NEXT: br label [[TMP17]] 1774; IR-ITERATIVE: 17: 1775; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ] 1776; IR-ITERATIVE-NEXT: ret double [[TMP18]] 1777; IR-ITERATIVE: ComputeLoop: 1778; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] 1779; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] 1780; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] 1781; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] 1782; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 1783; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] 1784; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] 1785; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] 1786; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] 1787; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 1788; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]] 1789; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0 1790; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]] 1791; IR-ITERATIVE: ComputeEnd: 1792; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0 1793; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] 1794; 1795; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( 1796; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] 1797; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] 1798; IR-DPP: 2: 1799; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] 1800; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 1801; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 1802; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 1803; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] 1804; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] 1805; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] 1806; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] 1807; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1808; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] 1809; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1810; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] 1811; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1812; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] 1813; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1814; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] 1815; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1816; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] 1817; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1818; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] 1819; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] 1820; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] 1821; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 1822; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] 1823; IR-DPP: 26: 1824; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] monotonic, align 4 1825; IR-DPP-NEXT: br label [[TMP28]] 1826; IR-DPP: 28: 1827; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] 1828; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] 1829; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] 1830; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] 1831; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] 1832; IR-DPP-NEXT: br label [[TMP34]] 1833; IR-DPP: 34: 1834; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ] 1835; IR-DPP-NEXT: ret double [[TMP35]] 1836; 1837 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1838 ret double %result 1839} 1840 1841define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double inreg %val) #0 { 1842; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe( 1843; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4 1844; IR-NEXT: ret double [[RESULT]] 1845; 1846 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1847 ret double %result 1848} 1849 1850define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double %val) #0 { 1851; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe( 1852; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4 1853; IR-NEXT: ret double [[RESULT]] 1854; 1855 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4 1856 ret double %result 1857} 1858 1859define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1 { 1860; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp( 1861; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8 1862; IR-NEXT: ret double [[RESULT]] 1863; 1864 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1865 ret double %result 1866} 1867 1868define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1 { 1869; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp( 1870; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8 1871; IR-NEXT: ret double [[RESULT]] 1872; 1873 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic 1874 ret double %result 1875} 1876 1877define amdgpu_ps double @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 { 1878; IR-LABEL: @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp( 1879; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1880; IR-NEXT: ret double [[RESULT]] 1881; 1882 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1883 ret double %result 1884} 1885 1886define amdgpu_ps double @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 { 1887; IR-LABEL: @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp( 1888; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1889; IR-NEXT: ret double [[RESULT]] 1890; 1891 %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1892 ret double %result 1893} 1894 1895define amdgpu_ps double @global_atomic_fmin_double_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, double inreg %val) #0 { 1896; IR-LABEL: @global_atomic_fmin_double_div_address_uni_value_agent_scope( 1897; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1898; IR-NEXT: ret double [[RESULT]] 1899; 1900 %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1901 ret double %result 1902} 1903 1904define amdgpu_ps double @global_atomic_fmin_double_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, double %val) #0 { 1905; IR-LABEL: @global_atomic_fmin_double_div_address_div_value_agent_scope( 1906; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1907; IR-NEXT: ret double [[RESULT]] 1908; 1909 %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1910 ret double %result 1911} 1912 1913define amdgpu_ps double @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1{ 1914; IR-LABEL: @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp( 1915; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1916; IR-NEXT: ret double [[RESULT]] 1917; 1918 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1919 ret double %result 1920} 1921 1922define amdgpu_ps double @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1{ 1923; IR-LABEL: @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp( 1924; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 1925; IR-NEXT: ret double [[RESULT]] 1926; 1927 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic 1928 ret double %result 1929} 1930 1931define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 { 1932; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp( 1933; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4 1934; IR-NEXT: ret double [[RESULT]] 1935; 1936 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1937 ret double %result 1938} 1939 1940define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 { 1941; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp( 1942; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4 1943; IR-NEXT: ret double [[RESULT]] 1944; 1945 %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4 1946 ret double %result 1947} 1948 1949attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 1950attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 1951attributes #2 = { strictfp } 1952