1f6c8a8e9SPravin Jagtap; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2699addefSPravin Jagtap; RUN: opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR %s 3f6c8a8e9SPravin Jagtap 4f6c8a8e9SPravin Jagtapdefine amdgpu_kernel void @uniform_value(ptr addrspace(1) , ptr addrspace(1) %val) #0 { 5f6c8a8e9SPravin Jagtap; IR-LABEL: @uniform_value( 6f6c8a8e9SPravin Jagtap; IR-NEXT: entry: 7f6c8a8e9SPravin Jagtap; IR-NEXT: [[UNIFORM_VALUE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 8f6c8a8e9SPravin Jagtap; IR-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[UNIFORM_VALUE_KERNARG_SEGMENT]], i64 36 9f6c8a8e9SPravin Jagtap; IR-NEXT: [[LOADED_OUT_KERNARG_OFFSET:%.*]] = load <2 x i64>, ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4 10f6c8a8e9SPravin Jagtap; IR-NEXT: [[OUT_LOAD1:%.*]] = extractelement <2 x i64> [[LOADED_OUT_KERNARG_OFFSET]], i32 0 11f6c8a8e9SPravin Jagtap; IR-NEXT: [[MEM_LOCATION:%.*]] = inttoptr i64 [[OUT_LOAD1]] to ptr addrspace(1) 12f6c8a8e9SPravin Jagtap; IR-NEXT: [[VAL_LOAD2:%.*]] = extractelement <2 x i64> [[LOADED_OUT_KERNARG_OFFSET]], i32 1 13f6c8a8e9SPravin Jagtap; IR-NEXT: [[VALUE_ADDRESS:%.*]] = inttoptr i64 [[VAL_LOAD2]] to ptr addrspace(1) 14f6c8a8e9SPravin Jagtap; IR-NEXT: [[LANE:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x() 15f6c8a8e9SPravin Jagtap; IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[LANE]] to i64 16f6c8a8e9SPravin Jagtap; IR-NEXT: [[ELE:%.*]] = getelementptr i32, ptr addrspace(1) [[VALUE_ADDRESS]], i64 [[IDXPROM]] 17f6c8a8e9SPravin Jagtap; IR-NEXT: [[VALUE:%.*]] = load i32, ptr addrspace(1) [[ELE]], align 4 18f6c8a8e9SPravin Jagtap; IR-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[MEM_LOCATION]], i32 4 19f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 20f09360d2SPravin Jagtap; IR-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 21f09360d2SPravin Jagtap; IR-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 22f09360d2SPravin Jagtap; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 23f09360d2SPravin Jagtap; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) 24f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) 25f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) 26f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 27f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP9:%.*]] = mul i32 [[VALUE]], [[TMP8]] 28f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP6]], 0 29f6c8a8e9SPravin Jagtap; IR-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP13:%.*]] 30f6c8a8e9SPravin Jagtap; IR: 11: 31f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP12:%.*]] = atomicrmw volatile add ptr addrspace(1) [[GEP]], i32 [[TMP9]] seq_cst, align 4 32f6c8a8e9SPravin Jagtap; IR-NEXT: br label [[TMP13]] 33f6c8a8e9SPravin Jagtap; IR: 13: 34f6c8a8e9SPravin Jagtap; IR-NEXT: ret void 35f6c8a8e9SPravin Jagtap; 36f6c8a8e9SPravin Jagtapentry: 37f6c8a8e9SPravin Jagtap %uniform_value.kernarg.segment = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 38f6c8a8e9SPravin Jagtap %out.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %uniform_value.kernarg.segment, i64 36 39f6c8a8e9SPravin Jagtap %loaded.out.kernarg.offset = load <2 x i64>, ptr addrspace(4) %out.kernarg.offset, align 4 40f6c8a8e9SPravin Jagtap %out.load1 = extractelement <2 x i64> %loaded.out.kernarg.offset, i32 0 41f6c8a8e9SPravin Jagtap %mem.location = inttoptr i64 %out.load1 to ptr addrspace(1) 42f6c8a8e9SPravin Jagtap %val.load2 = extractelement <2 x i64> %loaded.out.kernarg.offset, i32 1 43f6c8a8e9SPravin Jagtap %value.address = inttoptr i64 %val.load2 to ptr addrspace(1) 44f6c8a8e9SPravin Jagtap %lane = tail call i32 @llvm.amdgcn.workgroup.id.x() 45f6c8a8e9SPravin Jagtap %idxprom = sext i32 %lane to i64 46f6c8a8e9SPravin Jagtap %ele = getelementptr i32, ptr addrspace(1) %value.address, i64 %idxprom 47f6c8a8e9SPravin Jagtap %value = load i32, ptr addrspace(1) %ele, align 4 48f6c8a8e9SPravin Jagtap %gep = getelementptr i32, ptr addrspace(1) %mem.location, i32 4 49f6c8a8e9SPravin Jagtap %old = atomicrmw volatile add ptr addrspace(1) %gep, i32 %value seq_cst, align 4 50f6c8a8e9SPravin Jagtap ret void 51f6c8a8e9SPravin Jagtap} 52f6c8a8e9SPravin Jagtap 53f6c8a8e9SPravin Jagtapdefine amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, ptr addrspace(1) %val) #0 { 54f6c8a8e9SPravin Jagtap; IR-LABEL: @divergent_value( 55f6c8a8e9SPravin Jagtap; IR-NEXT: entry: 56f6c8a8e9SPravin Jagtap; IR-NEXT: [[DIVERGENT_VALUE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 57f6c8a8e9SPravin Jagtap; IR-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DIVERGENT_VALUE_KERNARG_SEGMENT]], i64 36 58f6c8a8e9SPravin Jagtap; IR-NEXT: [[LOADED_OUT_KERNARG_OFFSET:%.*]] = load <2 x i64>, ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4 59f6c8a8e9SPravin Jagtap; IR-NEXT: [[OUT_LOAD1:%.*]] = extractelement <2 x i64> [[LOADED_OUT_KERNARG_OFFSET]], i32 0 60f6c8a8e9SPravin Jagtap; IR-NEXT: [[MEM_LOCATION:%.*]] = inttoptr i64 [[OUT_LOAD1]] to ptr addrspace(1) 61f6c8a8e9SPravin Jagtap; IR-NEXT: [[VAL_LOAD2:%.*]] = extractelement <2 x i64> [[LOADED_OUT_KERNARG_OFFSET]], i32 1 62f6c8a8e9SPravin Jagtap; IR-NEXT: [[VALUE_ADDRESS:%.*]] = inttoptr i64 [[VAL_LOAD2]] to ptr addrspace(1) 63f6c8a8e9SPravin Jagtap; IR-NEXT: [[LANE:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 64f6c8a8e9SPravin Jagtap; IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[LANE]] to i64 65f6c8a8e9SPravin Jagtap; IR-NEXT: [[ELE:%.*]] = getelementptr i32, ptr addrspace(1) [[VALUE_ADDRESS]], i64 [[IDXPROM]] 66f6c8a8e9SPravin Jagtap; IR-NEXT: [[VALUE:%.*]] = load i32, ptr addrspace(1) [[ELE]], align 4 67f6c8a8e9SPravin Jagtap; IR-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[MEM_LOCATION]], i32 4 68f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 69f09360d2SPravin Jagtap; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 70f09360d2SPravin Jagtap; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 32 71f09360d2SPravin Jagtap; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 72f09360d2SPravin Jagtap; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1]], i32 0) 73f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) 74f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) 75f6c8a8e9SPravin Jagtap; IR-NEXT: br label [[COMPUTELOOP:%.*]] 76f6c8a8e9SPravin Jagtap; IR: 7: 77f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP8:%.*]] = atomicrmw volatile add ptr addrspace(1) [[GEP]], i32 [[TMP13:%.*]] seq_cst, align 4 78f6c8a8e9SPravin Jagtap; IR-NEXT: br label [[TMP9:%.*]] 79f6c8a8e9SPravin Jagtap; IR: 9: 80f6c8a8e9SPravin Jagtap; IR-NEXT: ret void 81f6c8a8e9SPravin Jagtap; IR: ComputeLoop: 82f6c8a8e9SPravin Jagtap; IR-NEXT: [[ACCUMULATOR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13]], [[COMPUTELOOP]] ] 83f6c8a8e9SPravin Jagtap; IR-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP6]], [[ENTRY]] ], [ [[TMP16:%.*]], [[COMPUTELOOP]] ] 84f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP10:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) 85f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 86*5feb32baSVikram Hegde; IR-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[VALUE]], i32 [[TMP11]]) 87f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP13]] = add i32 [[ACCUMULATOR]], [[TMP12]] 88f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP14:%.*]] = shl i64 1, [[TMP10]] 89f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], -1 90f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP16]] = and i64 [[ACTIVEBITS]], [[TMP15]] 91f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], 0 92f6c8a8e9SPravin Jagtap; IR-NEXT: br i1 [[TMP17]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] 93f6c8a8e9SPravin Jagtap; IR: ComputeEnd: 94f6c8a8e9SPravin Jagtap; IR-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP5]], 0 95f6c8a8e9SPravin Jagtap; IR-NEXT: br i1 [[TMP18]], label [[TMP7:%.*]], label [[TMP9]] 96f6c8a8e9SPravin Jagtap; 97f6c8a8e9SPravin Jagtapentry: 98f6c8a8e9SPravin Jagtap %divergent_value.kernarg.segment = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 99f6c8a8e9SPravin Jagtap %out.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %divergent_value.kernarg.segment, i64 36 100f6c8a8e9SPravin Jagtap %loaded.out.kernarg.offset = load <2 x i64>, ptr addrspace(4) %out.kernarg.offset, align 4 101f6c8a8e9SPravin Jagtap %out.load1 = extractelement <2 x i64> %loaded.out.kernarg.offset, i32 0 102f6c8a8e9SPravin Jagtap %mem.location = inttoptr i64 %out.load1 to ptr addrspace(1) 103f6c8a8e9SPravin Jagtap %val.load2 = extractelement <2 x i64> %loaded.out.kernarg.offset, i32 1 104f6c8a8e9SPravin Jagtap %value.address = inttoptr i64 %val.load2 to ptr addrspace(1) 105f6c8a8e9SPravin Jagtap %lane = tail call i32 @llvm.amdgcn.workitem.id.x() 106f6c8a8e9SPravin Jagtap %idxprom = sext i32 %lane to i64 107f6c8a8e9SPravin Jagtap %ele = getelementptr i32, ptr addrspace(1) %value.address, i64 %idxprom 108f6c8a8e9SPravin Jagtap %value = load i32, ptr addrspace(1) %ele, align 4 109f6c8a8e9SPravin Jagtap %gep = getelementptr i32, ptr addrspace(1) %mem.location, i32 4 110f6c8a8e9SPravin Jagtap %old = atomicrmw volatile add ptr addrspace(1) %gep, i32 %value seq_cst, align 4 111f6c8a8e9SPravin Jagtap ret void 112f6c8a8e9SPravin Jagtap} 113f6c8a8e9SPravin Jagtap 114f6c8a8e9SPravin Jagtapdeclare i32 @llvm.amdgcn.workitem.id.x() #1 115f6c8a8e9SPravin Jagtapdeclare i32 @llvm.amdgcn.workgroup.id.x() #1 116f6c8a8e9SPravin Jagtap 117f6c8a8e9SPravin Jagtapdeclare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 118f6c8a8e9SPravin Jagtap 119f6c8a8e9SPravin Jagtapattributes #0 = {"target-cpu"="gfx906"} 120f6c8a8e9SPravin Jagtapattributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none)} 121f6c8a8e9SPravin Jagtap 122