xref: /llvm-project/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll (revision 2a9607168b42498c4a70441089be57af88eec4d2)
1f09360d2SPravin Jagtap; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2a2dfc9acSpaperchalice; RUN:  opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
3a2dfc9acSpaperchalice; RUN:  opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
4f09360d2SPravin Jagtapdeclare i32 @llvm.amdgcn.workitem.id.x()
5f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 {
6f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value(
7f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
8f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
9f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
10f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
11f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
12f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
13f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
14f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
15f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
16f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
17f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
18f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
19f09360d2SPravin Jagtap; IR-ITERATIVE:       12:
20f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
21f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[TMP14]]
22f09360d2SPravin Jagtap; IR-ITERATIVE:       14:
23f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    ret void
24f09360d2SPravin Jagtap;
25f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_uni_value(
26f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
27f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
28f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
29f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
30f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
31f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
32f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
33f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
34f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
35f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
36f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
37f09360d2SPravin Jagtap; IR-DPP-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
38f09360d2SPravin Jagtap; IR-DPP:       12:
39f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
40f09360d2SPravin Jagtap; IR-DPP-NEXT:    br label [[TMP14]]
41f09360d2SPravin Jagtap; IR-DPP:       14:
42f09360d2SPravin Jagtap; IR-DPP-NEXT:    ret void
43f09360d2SPravin Jagtap;
44f09360d2SPravin Jagtap  %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
45f09360d2SPravin Jagtap  ret void
46f09360d2SPravin Jagtap}
47f09360d2SPravin Jagtap
48f09360d2SPravin Jagtap
49f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 {
50f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value(
51f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
52f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
53f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
54f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
55f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
56f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
57f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
58f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
59f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
60f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
61f09360d2SPravin Jagtap; IR-ITERATIVE:       8:
62*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4
63f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[TMP10:%.*]]
64f09360d2SPravin Jagtap; IR-ITERATIVE:       10:
65f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    ret void
66f09360d2SPravin Jagtap; IR-ITERATIVE:       ComputeLoop:
67*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ]
68*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ]
69f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
70f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
71*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]])
72*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]]
73*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = shl i64 1, [[TMP11]]
74*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = xor i64 [[TMP15]], -1
75*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]]
76*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0
77*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
78f09360d2SPravin Jagtap; IR-ITERATIVE:       ComputeEnd:
79*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0
80*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]]
81f09360d2SPravin Jagtap;
82f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fadd_div_value(
83f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
84f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
85f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
86f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
87f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
88f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
89f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
90f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
91*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00)
92*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false)
93*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]]
94*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false)
95*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
96*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false)
97*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
98*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false)
99*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
100*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false)
101*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
102*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false)
103*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
104*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63)
105*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]])
106*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0
107*2a960716SVikram Hegde; IR-DPP-NEXT:    br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]]
108*2a960716SVikram Hegde; IR-DPP:       23:
109*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP24:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4
110*2a960716SVikram Hegde; IR-DPP-NEXT:    br label [[TMP25]]
111*2a960716SVikram Hegde; IR-DPP:       25:
112f09360d2SPravin Jagtap; IR-DPP-NEXT:    ret void
113f09360d2SPravin Jagtap;
114f09360d2SPravin Jagtap  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
115f09360d2SPravin Jagtap  %divValue = bitcast i32 %id.x to float
116f09360d2SPravin Jagtap  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue seq_cst
117f09360d2SPravin Jagtap  ret void
118f09360d2SPravin Jagtap}
119f09360d2SPravin Jagtap
120f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 {
121f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value(
122f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
123f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
124f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
125f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
126f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
127f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
128f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
129f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
130f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
131f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
132f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
133f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
134f09360d2SPravin Jagtap; IR-ITERATIVE:       12:
135f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
136f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[TMP14]]
137f09360d2SPravin Jagtap; IR-ITERATIVE:       14:
138f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    ret void
139f09360d2SPravin Jagtap;
140f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_uni_value(
141f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
142f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
143f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
144f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
145f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
146f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
147f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
148f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
149f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
150f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
151f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
152f09360d2SPravin Jagtap; IR-DPP-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
153f09360d2SPravin Jagtap; IR-DPP:       12:
154f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
155f09360d2SPravin Jagtap; IR-DPP-NEXT:    br label [[TMP14]]
156f09360d2SPravin Jagtap; IR-DPP:       14:
157f09360d2SPravin Jagtap; IR-DPP-NEXT:    ret void
158f09360d2SPravin Jagtap;
159f09360d2SPravin Jagtap  %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
160f09360d2SPravin Jagtap  ret void
161f09360d2SPravin Jagtap}
162f09360d2SPravin Jagtap
163f09360d2SPravin Jagtap
164f09360d2SPravin Jagtapdefine amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 {
165f09360d2SPravin Jagtap; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value(
166f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
167f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
168f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
169f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
170f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
171f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
172f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
173f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
174f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
175f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
176f09360d2SPravin Jagtap; IR-ITERATIVE:       8:
177*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4
178f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    br label [[TMP10:%.*]]
179f09360d2SPravin Jagtap; IR-ITERATIVE:       10:
180f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    ret void
181f09360d2SPravin Jagtap; IR-ITERATIVE:       ComputeLoop:
182*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ]
183*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ]
184f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
185f09360d2SPravin Jagtap; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
186*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]])
187*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]]
188*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = shl i64 1, [[TMP11]]
189*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = xor i64 [[TMP15]], -1
190*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]]
191*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0
192*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
193f09360d2SPravin Jagtap; IR-ITERATIVE:       ComputeEnd:
194*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0
195*2a960716SVikram Hegde; IR-ITERATIVE-NEXT:    br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]]
196f09360d2SPravin Jagtap;
197f09360d2SPravin Jagtap; IR-DPP-LABEL: @global_atomic_fsub_div_value(
198f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
199f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
200f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
201f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
202f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
203f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
204f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
205f09360d2SPravin Jagtap; IR-DPP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
206*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00)
207*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false)
208*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]]
209*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false)
210*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
211*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false)
212*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
213*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false)
214*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
215*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false)
216*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
217*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false)
218*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
219*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63)
220*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]])
221*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0
222*2a960716SVikram Hegde; IR-DPP-NEXT:    br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]]
223*2a960716SVikram Hegde; IR-DPP:       23:
224*2a960716SVikram Hegde; IR-DPP-NEXT:    [[TMP24:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4
225*2a960716SVikram Hegde; IR-DPP-NEXT:    br label [[TMP25]]
226*2a960716SVikram Hegde; IR-DPP:       25:
227f09360d2SPravin Jagtap; IR-DPP-NEXT:    ret void
228f09360d2SPravin Jagtap;
229f09360d2SPravin Jagtap  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
230f09360d2SPravin Jagtap  %divValue = bitcast i32 %id.x to float
231f09360d2SPravin Jagtap  %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst
232f09360d2SPravin Jagtap  ret void
233f09360d2SPravin Jagtap}
234f09360d2SPravin Jagtap
235f09360d2SPravin Jagtapattributes #0 = {"target-cpu"="gfx906"}
236