xref: /llvm-project/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll (revision dfda9c5b9ef24b489f4fff30a6ebc0b57b4e7961)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN:  opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s
3; RUN:  opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s
4
5; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations.
6; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
7; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
8
9define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
10; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
11; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
12; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
13; IR:       2:
14; IR-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
15; IR-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
16; IR-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
17; IR-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
18; IR-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
19; IR-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
20; IR-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
21; IR-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
22; IR-NEXT:    [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
23; IR-NEXT:    [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
24; IR-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
25; IR-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
26; IR:       14:
27; IR-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
28; IR-NEXT:    br label [[TMP16]]
29; IR:       16:
30; IR-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
31; IR-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]])
32; IR-NEXT:    [[TMP19:%.*]] = uitofp i32 [[TMP8]] to float
33; IR-NEXT:    [[TMP20:%.*]] = fmul float [[VAL]], [[TMP19]]
34; IR-NEXT:    [[TMP21:%.*]] = fadd float [[TMP18]], [[TMP20]]
35; IR-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
36; IR-NEXT:    br label [[TMP23]]
37; IR:       23:
38; IR-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
39; IR-NEXT:    ret float [[TMP24]]
40;
41  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
42  ret float %result
43}
44
45define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
46; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
47; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
48; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
49; IR-ITERATIVE:       2:
50; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
51; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
52; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
53; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
54; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
55; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
56; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
57; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
58; IR-ITERATIVE:       10:
59; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
60; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
61; IR-ITERATIVE:       12:
62; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
63; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
64; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP22:%.*]]
65; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
66; IR-ITERATIVE-NEXT:    br label [[TMP17]]
67; IR-ITERATIVE:       17:
68; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
69; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
70; IR-ITERATIVE:       ComputeLoop:
71; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
72; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
73; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
74; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
75; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
76; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]])
77; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]])
78; IR-ITERATIVE-NEXT:    [[TMP23]] = fadd float [[ACCUMULATOR]], [[TMP21]]
79; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
80; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
81; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
82; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
83; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
84; IR-ITERATIVE:       ComputeEnd:
85; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
86; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
87;
88; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
89; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
90; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
91; IR-DPP:       2:
92; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
93; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
94; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
95; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
96; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
97; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
98; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00)
99; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
100; IR-DPP-NEXT:    [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
101; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false)
102; IR-DPP-NEXT:    [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
103; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false)
104; IR-DPP-NEXT:    [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
105; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false)
106; IR-DPP-NEXT:    [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
107; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false)
108; IR-DPP-NEXT:    [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
109; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false)
110; IR-DPP-NEXT:    [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]]
111; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false)
112; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63)
113; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]])
114; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
115; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
116; IR-DPP:       26:
117; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
118; IR-DPP-NEXT:    br label [[TMP28]]
119; IR-DPP:       28:
120; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
121; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]])
122; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]])
123; IR-DPP-NEXT:    [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]]
124; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
125; IR-DPP-NEXT:    br label [[TMP34]]
126; IR-DPP:       34:
127; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
128; IR-DPP-NEXT:    ret float [[TMP35]]
129;
130  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
131  ret float %result
132}
133
134define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
135; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(
136; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
137; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
138; IR-ITERATIVE:       2:
139; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
140; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
141; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
142; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
143; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
144; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
145; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
146; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
147; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
148; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
149; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
150; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
151; IR-ITERATIVE:       14:
152; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
153; IR-ITERATIVE-NEXT:    br label [[TMP16]]
154; IR-ITERATIVE:       16:
155; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
156; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
157; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
158; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
159; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
160; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
161; IR-ITERATIVE-NEXT:    br label [[TMP23]]
162; IR-ITERATIVE:       23:
163; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
164; IR-ITERATIVE-NEXT:    ret float [[TMP24]]
165;
166; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(
167; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]]
168; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
169; IR-DPP:       2:
170; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
171; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
172; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
173; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
174; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
175; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
176; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
177; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
178; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
179; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
180; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
181; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
182; IR-DPP:       14:
183; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
184; IR-DPP-NEXT:    br label [[TMP16]]
185; IR-DPP:       16:
186; IR-DPP-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
187; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
188; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
189; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
190; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
191; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
192; IR-DPP-NEXT:    br label [[TMP23]]
193; IR-DPP:       23:
194; IR-DPP-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
195; IR-DPP-NEXT:    ret float [[TMP24]]
196;
197  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
198  ret float %result
199}
200
201define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
202; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(
203; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
204; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
205; IR-ITERATIVE:       2:
206; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
207; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
208; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
209; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
210; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
211; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
212; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
213; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
214; IR-ITERATIVE:       10:
215; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("one-as") monotonic, align 4
216; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
217; IR-ITERATIVE:       12:
218; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
219; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
220; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
221; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
222; IR-ITERATIVE-NEXT:    br label [[TMP17]]
223; IR-ITERATIVE:       17:
224; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
225; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
226; IR-ITERATIVE:       ComputeLoop:
227; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
228; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
229; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
230; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
231; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
232; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
233; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
234; IR-ITERATIVE-NEXT:    [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
235; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
236; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
237; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
238; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
239; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
240; IR-ITERATIVE:       ComputeEnd:
241; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
242; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
243;
244; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(
245; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
246; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
247; IR-DPP:       2:
248; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
249; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
250; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
251; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
252; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
253; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
254; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
255; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
256; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
257; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
258; IR-DPP-NEXT:    [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
259; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
260; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
261; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
262; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
263; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
264; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
265; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
266; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
267; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
268; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
269; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
270; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
271; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
272; IR-DPP:       26:
273; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4
274; IR-DPP-NEXT:    br label [[TMP28]]
275; IR-DPP:       28:
276; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
277; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
278; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
279; IR-DPP-NEXT:    [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
280; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
281; IR-DPP-NEXT:    br label [[TMP34]]
282; IR-DPP:       34:
283; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
284; IR-DPP-NEXT:    ret float [[TMP35]]
285;
286  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
287  ret float %result
288}
289
290define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
291; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
292; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
293; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
294; IR-ITERATIVE:       2:
295; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
296; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
297; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
298; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
299; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
300; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
301; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
302; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
303; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
304; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
305; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
306; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
307; IR-ITERATIVE:       14:
308; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
309; IR-ITERATIVE-NEXT:    br label [[TMP16]]
310; IR-ITERATIVE:       16:
311; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
312; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
313; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
314; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
315; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
316; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
317; IR-ITERATIVE-NEXT:    br label [[TMP23]]
318; IR-ITERATIVE:       23:
319; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
320; IR-ITERATIVE-NEXT:    ret float [[TMP24]]
321;
322; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
323; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
324; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
325; IR-DPP:       2:
326; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
327; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
328; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
329; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
330; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
331; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
332; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
333; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
334; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
335; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
336; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
337; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
338; IR-DPP:       14:
339; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
340; IR-DPP-NEXT:    br label [[TMP16]]
341; IR-DPP:       16:
342; IR-DPP-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
343; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
344; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
345; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
346; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
347; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
348; IR-DPP-NEXT:    br label [[TMP23]]
349; IR-DPP:       23:
350; IR-DPP-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
351; IR-DPP-NEXT:    ret float [[TMP24]]
352;
353  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
354  ret float %result
355}
356
357define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
358; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
359; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
360; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
361; IR-ITERATIVE:       2:
362; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
363; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
364; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
365; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
366; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
367; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
368; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
369; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
370; IR-ITERATIVE:       10:
371; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
372; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
373; IR-ITERATIVE:       12:
374; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
375; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
376; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
377; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
378; IR-ITERATIVE-NEXT:    br label [[TMP17]]
379; IR-ITERATIVE:       17:
380; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
381; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
382; IR-ITERATIVE:       ComputeLoop:
383; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
384; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
385; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
386; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
387; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
388; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
389; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
390; IR-ITERATIVE-NEXT:    [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
391; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
392; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
393; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
394; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
395; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
396; IR-ITERATIVE:       ComputeEnd:
397; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
398; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
399;
400; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
401; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
402; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
403; IR-DPP:       2:
404; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
405; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
406; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
407; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
408; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
409; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
410; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
411; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
412; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
413; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
414; IR-DPP-NEXT:    [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
415; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
416; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
417; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
418; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
419; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
420; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
421; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
422; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
423; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
424; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
425; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
426; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
427; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
428; IR-DPP:       26:
429; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
430; IR-DPP-NEXT:    br label [[TMP28]]
431; IR-DPP:       28:
432; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
433; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
434; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
435; IR-DPP-NEXT:    [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
436; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
437; IR-DPP-NEXT:    br label [[TMP34]]
438; IR-DPP:       34:
439; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
440; IR-DPP-NEXT:    ret float [[TMP35]]
441;
442  %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
443  ret float %result
444}
445
446define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
447; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
448; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
449; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
450; IR:       2:
451; IR-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
452; IR-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
453; IR-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
454; IR-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
455; IR-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
456; IR-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
457; IR-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
458; IR-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
459; IR:       10:
460; IR-NEXT:    [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
461; IR-NEXT:    br label [[TMP12]]
462; IR:       12:
463; IR-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
464; IR-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
465; IR-NEXT:    [[TMP15:%.*]] = uitofp i32 [[TMP8]] to float
466; IR-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
467; IR-NEXT:    [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP16]])
468; IR-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
469; IR-NEXT:    br label [[TMP19]]
470; IR:       19:
471; IR-NEXT:    [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
472; IR-NEXT:    ret float [[TMP20]]
473;
474  %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
475  ret float %result
476}
477
478define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
479; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
480; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
481; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
482; IR-ITERATIVE:       2:
483; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
484; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
485; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
486; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
487; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
488; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
489; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
490; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
491; IR-ITERATIVE:       10:
492; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
493; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
494; IR-ITERATIVE:       12:
495; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
496; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
497; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP22:%.*]])
498; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
499; IR-ITERATIVE-NEXT:    br label [[TMP17]]
500; IR-ITERATIVE:       17:
501; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
502; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
503; IR-ITERATIVE:       ComputeLoop:
504; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
505; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
506; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
507; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
508; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
509; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]])
510; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]])
511; IR-ITERATIVE-NEXT:    [[TMP23]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP21]])
512; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
513; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
514; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
515; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
516; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
517; IR-ITERATIVE:       ComputeEnd:
518; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
519; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
520;
521; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
522; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
523; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
524; IR-DPP:       2:
525; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
526; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
527; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
528; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
529; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
530; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
531; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000)
532; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
533; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]])
534; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false)
535; IR-DPP-NEXT:    [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]])
536; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false)
537; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]])
538; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false)
539; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]])
540; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false)
541; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]])
542; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false)
543; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]])
544; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false)
545; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63)
546; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]])
547; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
548; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
549; IR-DPP:       26:
550; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
551; IR-DPP-NEXT:    br label [[TMP28]]
552; IR-DPP:       28:
553; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
554; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]])
555; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]])
556; IR-DPP-NEXT:    [[TMP32:%.*]] = call float @llvm.minnum.f32(float [[TMP30]], float [[TMP31]])
557; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
558; IR-DPP-NEXT:    br label [[TMP34]]
559; IR-DPP:       34:
560; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
561; IR-DPP-NEXT:    ret float [[TMP35]]
562;
563  %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
564  ret float %result
565}
566
567define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
568; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(
569; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
570; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
571; IR-ITERATIVE:       2:
572; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
573; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
574; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
575; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
576; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
577; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
578; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
579; IR-ITERATIVE-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
580; IR-ITERATIVE:       10:
581; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
582; IR-ITERATIVE-NEXT:    br label [[TMP12]]
583; IR-ITERATIVE:       12:
584; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
585; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
586; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
587; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
588; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]]
589; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
590; IR-ITERATIVE-NEXT:    br label [[TMP19]]
591; IR-ITERATIVE:       19:
592; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
593; IR-ITERATIVE-NEXT:    ret float [[TMP20]]
594;
595; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(
596; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
597; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
598; IR-DPP:       2:
599; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
600; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
601; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
602; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
603; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
604; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
605; IR-DPP-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
606; IR-DPP-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
607; IR-DPP:       10:
608; IR-DPP-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
609; IR-DPP-NEXT:    br label [[TMP12]]
610; IR-DPP:       12:
611; IR-DPP-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
612; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]]
613; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
614; IR-DPP-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
615; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
616; IR-DPP-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
617; IR-DPP-NEXT:    br label [[TMP19]]
618; IR-DPP:       19:
619; IR-DPP-NEXT:    [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
620; IR-DPP-NEXT:    ret float [[TMP20]]
621;
622  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
623  ret float %result
624}
625
626define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{
627; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(
628; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
629; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
630; IR-ITERATIVE:       2:
631; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
632; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
633; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
634; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
635; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
636; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
637; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
638; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
639; IR-ITERATIVE:       10:
640; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
641; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
642; IR-ITERATIVE:       12:
643; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
644; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
645; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]]
646; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
647; IR-ITERATIVE-NEXT:    br label [[TMP17]]
648; IR-ITERATIVE:       17:
649; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
650; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
651; IR-ITERATIVE:       ComputeLoop:
652; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
653; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
654; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
655; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
656; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
657; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
658; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
659; IR-ITERATIVE-NEXT:    [[TMP23]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]]
660; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
661; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
662; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
663; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
664; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
665; IR-ITERATIVE:       ComputeEnd:
666; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
667; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
668;
669; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(
670; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
671; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
672; IR-DPP:       2:
673; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
674; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
675; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
676; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
677; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
678; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
679; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]]
680; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
681; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]]
682; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
683; IR-DPP-NEXT:    [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]]
684; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
685; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]]
686; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
687; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
688; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
689; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]]
690; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
691; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]]
692; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
693; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
694; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
695; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
696; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
697; IR-DPP:       26:
698; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
699; IR-DPP-NEXT:    br label [[TMP28]]
700; IR-DPP:       28:
701; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
702; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
703; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
704; IR-DPP-NEXT:    [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]]
705; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
706; IR-DPP-NEXT:    br label [[TMP34]]
707; IR-DPP:       34:
708; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
709; IR-DPP-NEXT:    ret float [[TMP35]]
710;
711  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
712  ret float %result
713}
714
715define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
716; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
717; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
718; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
719; IR-ITERATIVE:       2:
720; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
721; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
722; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
723; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
724; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
725; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
726; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
727; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
728; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
729; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
730; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
731; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
732; IR-ITERATIVE:       14:
733; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
734; IR-ITERATIVE-NEXT:    br label [[TMP16]]
735; IR-ITERATIVE:       16:
736; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
737; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
738; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
739; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
740; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
741; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
742; IR-ITERATIVE-NEXT:    br label [[TMP23]]
743; IR-ITERATIVE:       23:
744; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
745; IR-ITERATIVE-NEXT:    ret float [[TMP24]]
746;
747; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
748; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
749; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
750; IR-DPP:       2:
751; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
752; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
753; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
754; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
755; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
756; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
757; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
758; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
759; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
760; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
761; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
762; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
763; IR-DPP:       14:
764; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
765; IR-DPP-NEXT:    br label [[TMP16]]
766; IR-DPP:       16:
767; IR-DPP-NEXT:    [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
768; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
769; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
770; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
771; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
772; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
773; IR-DPP-NEXT:    br label [[TMP23]]
774; IR-DPP:       23:
775; IR-DPP-NEXT:    [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
776; IR-DPP-NEXT:    ret float [[TMP24]]
777;
778  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
779  ret float %result
780}
781
782define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
783; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
784; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
785; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
786; IR-ITERATIVE:       2:
787; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
788; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
789; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
790; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
791; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
792; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
793; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
794; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
795; IR-ITERATIVE:       10:
796; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] monotonic, align 4
797; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
798; IR-ITERATIVE:       12:
799; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
800; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
801; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
802; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
803; IR-ITERATIVE-NEXT:    br label [[TMP17]]
804; IR-ITERATIVE:       17:
805; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
806; IR-ITERATIVE-NEXT:    ret float [[TMP18]]
807; IR-ITERATIVE:       ComputeLoop:
808; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
809; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
810; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
811; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
812; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
813; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
814; IR-ITERATIVE-NEXT:    [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
815; IR-ITERATIVE-NEXT:    [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
816; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
817; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
818; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
819; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
820; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
821; IR-ITERATIVE:       ComputeEnd:
822; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
823; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
824;
825; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
826; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
827; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
828; IR-DPP:       2:
829; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
830; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
831; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
832; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
833; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
834; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
835; IR-DPP-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
836; IR-DPP-NEXT:    [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
837; IR-DPP-NEXT:    [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
838; IR-DPP-NEXT:    [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
839; IR-DPP-NEXT:    [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
840; IR-DPP-NEXT:    [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
841; IR-DPP-NEXT:    [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
842; IR-DPP-NEXT:    [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
843; IR-DPP-NEXT:    [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
844; IR-DPP-NEXT:    [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
845; IR-DPP-NEXT:    [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
846; IR-DPP-NEXT:    [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
847; IR-DPP-NEXT:    [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
848; IR-DPP-NEXT:    [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
849; IR-DPP-NEXT:    [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
850; IR-DPP-NEXT:    [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
851; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
852; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
853; IR-DPP:       26:
854; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4
855; IR-DPP-NEXT:    br label [[TMP28]]
856; IR-DPP:       28:
857; IR-DPP-NEXT:    [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
858; IR-DPP-NEXT:    [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
859; IR-DPP-NEXT:    [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
860; IR-DPP-NEXT:    [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
861; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
862; IR-DPP-NEXT:    br label [[TMP34]]
863; IR-DPP:       34:
864; IR-DPP-NEXT:    [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
865; IR-DPP-NEXT:    ret float [[TMP35]]
866;
867  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
868  ret float %result
869}
870
871define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
872; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
873; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
874; IR-NEXT:    ret float [[RESULT]]
875;
876  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
877  ret float %result
878}
879
880define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
881; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
882; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
883; IR-NEXT:    ret float [[RESULT]]
884;
885  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
886  ret float %result
887}
888
889define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
890; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(
891; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
892; IR-NEXT:    ret float [[RESULT]]
893;
894  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
895  ret float %result
896}
897
898define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1 {
899; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(
900; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
901; IR-NEXT:    ret float [[RESULT]]
902;
903  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
904  ret float %result
905}
906
907define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
908; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
909; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
910; IR-NEXT:    ret float [[RESULT]]
911;
912  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
913  ret float %result
914}
915
916define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
917; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
918; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
919; IR-NEXT:    ret float [[RESULT]]
920;
921  %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
922  ret float %result
923}
924
925define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
926; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
927; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
928; IR-NEXT:    ret float [[RESULT]]
929;
930  %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
931  ret float %result
932}
933
934define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
935; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
936; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
937; IR-NEXT:    ret float [[RESULT]]
938;
939  %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
940  ret float %result
941}
942
943define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1{
944; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(
945; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
946; IR-NEXT:    ret float [[RESULT]]
947;
948  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
949  ret float %result
950}
951
952define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1{
953; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(
954; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
955; IR-NEXT:    ret float [[RESULT]]
956;
957  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
958  ret float %result
959}
960
961define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
962; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
963; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
964; IR-NEXT:    ret float [[RESULT]]
965;
966  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
967  ret float %result
968}
969
970define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
971; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
972; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
973; IR-NEXT:    ret float [[RESULT]]
974;
975  %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
976  ret float %result
977}
978
979define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 {
980; IR-LABEL: @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(
981; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
982; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
983; IR:       2:
984; IR-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
985; IR-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
986; IR-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
987; IR-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
988; IR-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
989; IR-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
990; IR-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
991; IR-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
992; IR-NEXT:    [[TMP11:%.*]] = uitofp i32 [[TMP10]] to double
993; IR-NEXT:    [[TMP12:%.*]] = fmul double [[VAL:%.*]], [[TMP11]]
994; IR-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
995; IR-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
996; IR:       14:
997; IR-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 4
998; IR-NEXT:    br label [[TMP16]]
999; IR:       16:
1000; IR-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1001; IR-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]])
1002; IR-NEXT:    [[TMP19:%.*]] = uitofp i32 [[TMP8]] to double
1003; IR-NEXT:    [[TMP20:%.*]] = fmul double [[VAL]], [[TMP19]]
1004; IR-NEXT:    [[TMP21:%.*]] = fadd double [[TMP18]], [[TMP20]]
1005; IR-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1006; IR-NEXT:    br label [[TMP23]]
1007; IR:       23:
1008; IR-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1009; IR-NEXT:    ret double [[TMP24]]
1010;
1011  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
1012  ret double %result
1013}
1014
1015define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 {
1016; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(
1017; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
1018; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1019; IR-ITERATIVE:       2:
1020; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1021; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1022; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1023; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1024; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
1025; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
1026; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1027; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1028; IR-ITERATIVE:       10:
1029; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 4
1030; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1031; IR-ITERATIVE:       12:
1032; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1033; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
1034; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = fadd double [[TMP14]], [[TMP22:%.*]]
1035; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1036; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1037; IR-ITERATIVE:       17:
1038; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1039; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1040; IR-ITERATIVE:       ComputeLoop:
1041; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1042; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1043; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1044; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
1045; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1046; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]])
1047; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]])
1048; IR-ITERATIVE-NEXT:    [[TMP23]] = fadd double [[ACCUMULATOR]], [[TMP21]]
1049; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1050; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1051; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1052; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1053; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1054; IR-ITERATIVE:       ComputeEnd:
1055; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1056; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1057;
1058; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(
1059; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
1060; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1061; IR-DPP:       2:
1062; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1063; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1064; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1065; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1066; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
1067; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
1068; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00)
1069; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false)
1070; IR-DPP-NEXT:    [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
1071; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false)
1072; IR-DPP-NEXT:    [[TMP13:%.*]] = fadd double [[TMP11]], [[TMP12]]
1073; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false)
1074; IR-DPP-NEXT:    [[TMP15:%.*]] = fadd double [[TMP13]], [[TMP14]]
1075; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false)
1076; IR-DPP-NEXT:    [[TMP17:%.*]] = fadd double [[TMP15]], [[TMP16]]
1077; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false)
1078; IR-DPP-NEXT:    [[TMP19:%.*]] = fadd double [[TMP17]], [[TMP18]]
1079; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false)
1080; IR-DPP-NEXT:    [[TMP21:%.*]] = fadd double [[TMP19]], [[TMP20]]
1081; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false)
1082; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63)
1083; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]])
1084; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1085; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1086; IR-DPP:       26:
1087; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 4
1088; IR-DPP-NEXT:    br label [[TMP28]]
1089; IR-DPP:       28:
1090; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1091; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]])
1092; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]])
1093; IR-DPP-NEXT:    [[TMP32:%.*]] = fadd double [[TMP30]], [[TMP31]]
1094; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1095; IR-DPP-NEXT:    br label [[TMP34]]
1096; IR-DPP:       34:
1097; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1098; IR-DPP-NEXT:    ret double [[TMP35]]
1099;
1100  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
1101  ret double %result
1102}
1103
1104define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 {
1105; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(
1106; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1107; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1108; IR-ITERATIVE:       2:
1109; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1110; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1111; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1112; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1113; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1114; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1115; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
1116; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1117; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1118; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1119; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1120; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1121; IR-ITERATIVE:       14:
1122; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8
1123; IR-ITERATIVE-NEXT:    br label [[TMP16]]
1124; IR-ITERATIVE:       16:
1125; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1126; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
1127; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1128; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1129; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1130; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1131; IR-ITERATIVE-NEXT:    br label [[TMP23]]
1132; IR-ITERATIVE:       23:
1133; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1134; IR-ITERATIVE-NEXT:    ret double [[TMP24]]
1135;
1136; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(
1137; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1138; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1139; IR-DPP:       2:
1140; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1141; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1142; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1143; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1144; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1145; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1146; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
1147; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1148; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1149; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1150; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1151; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1152; IR-DPP:       14:
1153; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8
1154; IR-DPP-NEXT:    br label [[TMP16]]
1155; IR-DPP:       16:
1156; IR-DPP-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1157; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
1158; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1159; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1160; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1161; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1162; IR-DPP-NEXT:    br label [[TMP23]]
1163; IR-DPP:       23:
1164; IR-DPP-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1165; IR-DPP-NEXT:    ret double [[TMP24]]
1166;
1167  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
1168  ret double %result
1169}
1170
1171define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 {
1172; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(
1173; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1174; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1175; IR-ITERATIVE:       2:
1176; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1177; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1178; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1179; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1180; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1181; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1182; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1183; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1184; IR-ITERATIVE:       10:
1185; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("one-as") monotonic, align 8
1186; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1187; IR-ITERATIVE:       12:
1188; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1189; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
1190; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1191; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1192; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1193; IR-ITERATIVE:       17:
1194; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1195; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1196; IR-ITERATIVE:       ComputeLoop:
1197; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1198; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1199; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1200; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
1201; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1202; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
1203; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
1204; IR-ITERATIVE-NEXT:    [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1205; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1206; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1207; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1208; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1209; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1210; IR-ITERATIVE:       ComputeEnd:
1211; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1212; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1213;
1214; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(
1215; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1216; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1217; IR-DPP:       2:
1218; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1219; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1220; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1221; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1222; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1223; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1224; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
1225; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
1226; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1227; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
1228; IR-DPP-NEXT:    [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1229; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
1230; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1231; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
1232; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1233; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
1234; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1235; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
1236; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1237; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
1238; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
1239; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
1240; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1241; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1242; IR-DPP:       26:
1243; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("one-as") monotonic, align 8
1244; IR-DPP-NEXT:    br label [[TMP28]]
1245; IR-DPP:       28:
1246; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1247; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
1248; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
1249; IR-DPP-NEXT:    [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1250; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1251; IR-DPP-NEXT:    br label [[TMP34]]
1252; IR-DPP:       34:
1253; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1254; IR-DPP-NEXT:    ret double [[TMP35]]
1255;
1256  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
1257  ret double %result
1258}
1259
1260define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 {
1261; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(
1262; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1263; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1264; IR-ITERATIVE:       2:
1265; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1266; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1267; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1268; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1269; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1270; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1271; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
1272; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1273; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1274; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1275; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1276; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1277; IR-ITERATIVE:       14:
1278; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8
1279; IR-ITERATIVE-NEXT:    br label [[TMP16]]
1280; IR-ITERATIVE:       16:
1281; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1282; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
1283; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1284; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1285; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1286; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1287; IR-ITERATIVE-NEXT:    br label [[TMP23]]
1288; IR-ITERATIVE:       23:
1289; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1290; IR-ITERATIVE-NEXT:    ret double [[TMP24]]
1291;
1292; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(
1293; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1294; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1295; IR-DPP:       2:
1296; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1297; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1298; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1299; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1300; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1301; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1302; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
1303; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1304; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1305; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1306; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1307; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1308; IR-DPP:       14:
1309; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8
1310; IR-DPP-NEXT:    br label [[TMP16]]
1311; IR-DPP:       16:
1312; IR-DPP-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1313; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
1314; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1315; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1316; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1317; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1318; IR-DPP-NEXT:    br label [[TMP23]]
1319; IR-DPP:       23:
1320; IR-DPP-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1321; IR-DPP-NEXT:    ret double [[TMP24]]
1322;
1323  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1324  ret double %result
1325}
1326
1327define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 {
1328; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(
1329; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1330; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1331; IR-ITERATIVE:       2:
1332; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1333; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1334; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1335; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1336; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1337; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1338; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1339; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1340; IR-ITERATIVE:       10:
1341; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
1342; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1343; IR-ITERATIVE:       12:
1344; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1345; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
1346; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1347; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1348; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1349; IR-ITERATIVE:       17:
1350; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1351; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1352; IR-ITERATIVE:       ComputeLoop:
1353; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1354; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1355; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1356; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
1357; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1358; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
1359; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
1360; IR-ITERATIVE-NEXT:    [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1361; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1362; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1363; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1364; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1365; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1366; IR-ITERATIVE:       ComputeEnd:
1367; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1368; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1369;
1370; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(
1371; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1372; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1373; IR-DPP:       2:
1374; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1375; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1376; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1377; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1378; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1379; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1380; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
1381; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
1382; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1383; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
1384; IR-DPP-NEXT:    [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1385; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
1386; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1387; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
1388; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1389; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
1390; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1391; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
1392; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1393; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
1394; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
1395; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
1396; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1397; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1398; IR-DPP:       26:
1399; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
1400; IR-DPP-NEXT:    br label [[TMP28]]
1401; IR-DPP:       28:
1402; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1403; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
1404; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
1405; IR-DPP-NEXT:    [[TMP32:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1406; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1407; IR-DPP-NEXT:    br label [[TMP34]]
1408; IR-DPP:       34:
1409; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1410; IR-DPP-NEXT:    ret double [[TMP35]]
1411;
1412  %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1413  ret double %result
1414}
1415
1416define amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 {
1417; IR-LABEL: @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(
1418; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
1419; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
1420; IR:       2:
1421; IR-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1422; IR-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1423; IR-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1424; IR-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1425; IR-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
1426; IR-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
1427; IR-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
1428; IR-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
1429; IR:       10:
1430; IR-NEXT:    [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1431; IR-NEXT:    br label [[TMP12]]
1432; IR:       12:
1433; IR-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
1434; IR-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
1435; IR-NEXT:    [[TMP15:%.*]] = uitofp i32 [[TMP8]] to double
1436; IR-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
1437; IR-NEXT:    [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP16]])
1438; IR-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
1439; IR-NEXT:    br label [[TMP19]]
1440; IR:       19:
1441; IR-NEXT:    [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
1442; IR-NEXT:    ret double [[TMP20]]
1443;
1444  %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1445  ret double %result
1446}
1447
1448define amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 {
1449; IR-ITERATIVE-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(
1450; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
1451; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1452; IR-ITERATIVE:       2:
1453; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1454; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1455; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1456; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1457; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
1458; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
1459; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1460; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1461; IR-ITERATIVE:       10:
1462; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
1463; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1464; IR-ITERATIVE:       12:
1465; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1466; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
1467; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP22:%.*]])
1468; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1469; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1470; IR-ITERATIVE:       17:
1471; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1472; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1473; IR-ITERATIVE:       ComputeLoop:
1474; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1475; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1476; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1477; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
1478; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1479; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]])
1480; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]])
1481; IR-ITERATIVE-NEXT:    [[TMP23]] = call double @llvm.minnum.f64(double [[ACCUMULATOR]], double [[TMP21]])
1482; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1483; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1484; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1485; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1486; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1487; IR-ITERATIVE:       ComputeEnd:
1488; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1489; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1490;
1491; IR-DPP-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(
1492; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
1493; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1494; IR-DPP:       2:
1495; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
1496; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1497; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1498; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1499; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
1500; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
1501; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000)
1502; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false)
1503; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.minnum.f64(double [[TMP9]], double [[TMP10]])
1504; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false)
1505; IR-DPP-NEXT:    [[TMP13:%.*]] = call double @llvm.minnum.f64(double [[TMP11]], double [[TMP12]])
1506; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false)
1507; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP13]], double [[TMP14]])
1508; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false)
1509; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP15]], double [[TMP16]])
1510; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false)
1511; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.minnum.f64(double [[TMP17]], double [[TMP18]])
1512; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false)
1513; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.minnum.f64(double [[TMP19]], double [[TMP20]])
1514; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false)
1515; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63)
1516; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]])
1517; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1518; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1519; IR-DPP:       26:
1520; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
1521; IR-DPP-NEXT:    br label [[TMP28]]
1522; IR-DPP:       28:
1523; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1524; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]])
1525; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]])
1526; IR-DPP-NEXT:    [[TMP32:%.*]] = call double @llvm.minnum.f64(double [[TMP30]], double [[TMP31]])
1527; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1528; IR-DPP-NEXT:    br label [[TMP34]]
1529; IR-DPP:       34:
1530; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1531; IR-DPP-NEXT:    ret double [[TMP35]]
1532;
1533  %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1534  ret double %result
1535}
1536
1537define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{
1538; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(
1539; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1540; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
1541; IR-ITERATIVE:       2:
1542; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1543; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1544; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1545; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1546; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1547; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1548; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
1549; IR-ITERATIVE-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
1550; IR-ITERATIVE:       10:
1551; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1552; IR-ITERATIVE-NEXT:    br label [[TMP12]]
1553; IR-ITERATIVE:       12:
1554; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
1555; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
1556; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1557; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
1558; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]]
1559; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
1560; IR-ITERATIVE-NEXT:    br label [[TMP19]]
1561; IR-ITERATIVE:       19:
1562; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
1563; IR-ITERATIVE-NEXT:    ret double [[TMP20]]
1564;
1565; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(
1566; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1567; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
1568; IR-DPP:       2:
1569; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1570; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1571; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1572; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1573; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1574; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1575; IR-DPP-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
1576; IR-DPP-NEXT:    br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
1577; IR-DPP:       10:
1578; IR-DPP-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1579; IR-DPP-NEXT:    br label [[TMP12]]
1580; IR-DPP:       12:
1581; IR-DPP-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
1582; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]]
1583; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1584; IR-DPP-NEXT:    [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
1585; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
1586; IR-DPP-NEXT:    [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
1587; IR-DPP-NEXT:    br label [[TMP19]]
1588; IR-DPP:       19:
1589; IR-DPP-NEXT:    [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
1590; IR-DPP-NEXT:    ret double [[TMP20]]
1591;
1592  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1593  ret double %result
1594}
1595
1596define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{
1597; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(
1598; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1599; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1600; IR-ITERATIVE:       2:
1601; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1602; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1603; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1604; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1605; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1606; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1607; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1608; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1609; IR-ITERATIVE:       10:
1610; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
1611; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1612; IR-ITERATIVE:       12:
1613; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1614; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
1615; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]]
1616; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1617; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1618; IR-ITERATIVE:       17:
1619; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1620; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1621; IR-ITERATIVE:       ComputeLoop:
1622; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1623; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1624; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1625; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
1626; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1627; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
1628; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
1629; IR-ITERATIVE-NEXT:    [[TMP23]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]]
1630; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1631; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1632; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1633; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1634; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1635; IR-ITERATIVE:       ComputeEnd:
1636; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1637; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1638;
1639; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(
1640; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1641; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1642; IR-DPP:       2:
1643; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1644; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1645; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1646; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1647; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1648; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1649; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]]
1650; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
1651; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]]
1652; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
1653; IR-DPP-NEXT:    [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]]
1654; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
1655; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]]
1656; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
1657; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
1658; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
1659; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]]
1660; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
1661; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]]
1662; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
1663; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
1664; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
1665; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1666; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1667; IR-DPP:       26:
1668; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
1669; IR-DPP-NEXT:    br label [[TMP28]]
1670; IR-DPP:       28:
1671; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1672; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
1673; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
1674; IR-DPP-NEXT:    [[TMP32:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP30]], double [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]]
1675; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1676; IR-DPP-NEXT:    br label [[TMP34]]
1677; IR-DPP:       34:
1678; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1679; IR-DPP-NEXT:    ret double [[TMP35]]
1680;
1681  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1682  ret double %result
1683}
1684
1685define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 {
1686; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(
1687; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1688; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1689; IR-ITERATIVE:       2:
1690; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1691; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1692; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1693; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1694; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1695; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1696; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
1697; IR-ITERATIVE-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1698; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1699; IR-ITERATIVE-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1700; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1701; IR-ITERATIVE-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1702; IR-ITERATIVE:       14:
1703; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4
1704; IR-ITERATIVE-NEXT:    br label [[TMP16]]
1705; IR-ITERATIVE:       16:
1706; IR-ITERATIVE-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1707; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
1708; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1709; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1710; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1711; IR-ITERATIVE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1712; IR-ITERATIVE-NEXT:    br label [[TMP23]]
1713; IR-ITERATIVE:       23:
1714; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1715; IR-ITERATIVE-NEXT:    ret double [[TMP24]]
1716;
1717; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(
1718; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1719; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
1720; IR-DPP:       2:
1721; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1722; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1723; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1724; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1725; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1726; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1727; IR-DPP-NEXT:    [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
1728; IR-DPP-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1729; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1730; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1731; IR-DPP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
1732; IR-DPP-NEXT:    br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
1733; IR-DPP:       14:
1734; IR-DPP-NEXT:    [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4
1735; IR-DPP-NEXT:    br label [[TMP16]]
1736; IR-DPP:       16:
1737; IR-DPP-NEXT:    [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
1738; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
1739; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1740; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1741; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1742; IR-DPP-NEXT:    [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
1743; IR-DPP-NEXT:    br label [[TMP23]]
1744; IR-DPP:       23:
1745; IR-DPP-NEXT:    [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
1746; IR-DPP-NEXT:    ret double [[TMP24]]
1747;
1748  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
1749  ret double %result
1750}
1751
1752define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 {
1753; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(
1754; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
1755; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
1756; IR-ITERATIVE:       2:
1757; IR-ITERATIVE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1758; IR-ITERATIVE-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1759; IR-ITERATIVE-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1760; IR-ITERATIVE-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1761; IR-ITERATIVE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
1762; IR-ITERATIVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
1763; IR-ITERATIVE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
1764; IR-ITERATIVE-NEXT:    br label [[COMPUTELOOP:%.*]]
1765; IR-ITERATIVE:       10:
1766; IR-ITERATIVE-NEXT:    [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] monotonic, align 4
1767; IR-ITERATIVE-NEXT:    br label [[TMP12:%.*]]
1768; IR-ITERATIVE:       12:
1769; IR-ITERATIVE-NEXT:    [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
1770; IR-ITERATIVE-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
1771; IR-ITERATIVE-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1772; IR-ITERATIVE-NEXT:    [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
1773; IR-ITERATIVE-NEXT:    br label [[TMP17]]
1774; IR-ITERATIVE:       17:
1775; IR-ITERATIVE-NEXT:    [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
1776; IR-ITERATIVE-NEXT:    ret double [[TMP18]]
1777; IR-ITERATIVE:       ComputeLoop:
1778; IR-ITERATIVE-NEXT:    [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
1779; IR-ITERATIVE-NEXT:    [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
1780; IR-ITERATIVE-NEXT:    [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
1781; IR-ITERATIVE-NEXT:    [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
1782; IR-ITERATIVE-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
1783; IR-ITERATIVE-NEXT:    [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
1784; IR-ITERATIVE-NEXT:    [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
1785; IR-ITERATIVE-NEXT:    [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
1786; IR-ITERATIVE-NEXT:    [[TMP24:%.*]] = shl i64 1, [[TMP19]]
1787; IR-ITERATIVE-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], -1
1788; IR-ITERATIVE-NEXT:    [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
1789; IR-ITERATIVE-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
1790; IR-ITERATIVE-NEXT:    br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
1791; IR-ITERATIVE:       ComputeEnd:
1792; IR-ITERATIVE-NEXT:    [[TMP28]] = icmp eq i32 [[TMP8]], 0
1793; IR-ITERATIVE-NEXT:    br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
1794;
1795; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(
1796; IR-DPP-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
1797; IR-DPP-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
1798; IR-DPP:       2:
1799; IR-DPP-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
1800; IR-DPP-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1801; IR-DPP-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
1802; IR-DPP-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
1803; IR-DPP-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
1804; IR-DPP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
1805; IR-DPP-NEXT:    [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
1806; IR-DPP-NEXT:    [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
1807; IR-DPP-NEXT:    [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1808; IR-DPP-NEXT:    [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
1809; IR-DPP-NEXT:    [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1810; IR-DPP-NEXT:    [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
1811; IR-DPP-NEXT:    [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1812; IR-DPP-NEXT:    [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
1813; IR-DPP-NEXT:    [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1814; IR-DPP-NEXT:    [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
1815; IR-DPP-NEXT:    [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1816; IR-DPP-NEXT:    [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
1817; IR-DPP-NEXT:    [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1818; IR-DPP-NEXT:    [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
1819; IR-DPP-NEXT:    [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
1820; IR-DPP-NEXT:    [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
1821; IR-DPP-NEXT:    [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
1822; IR-DPP-NEXT:    br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
1823; IR-DPP:       26:
1824; IR-DPP-NEXT:    [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] monotonic, align 4
1825; IR-DPP-NEXT:    br label [[TMP28]]
1826; IR-DPP:       28:
1827; IR-DPP-NEXT:    [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
1828; IR-DPP-NEXT:    [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
1829; IR-DPP-NEXT:    [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
1830; IR-DPP-NEXT:    [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1831; IR-DPP-NEXT:    [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
1832; IR-DPP-NEXT:    br label [[TMP34]]
1833; IR-DPP:       34:
1834; IR-DPP-NEXT:    [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
1835; IR-DPP-NEXT:    ret double [[TMP35]]
1836;
1837  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
1838  ret double %result
1839}
1840
1841define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double inreg %val) #0 {
1842; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(
1843; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4
1844; IR-NEXT:    ret double [[RESULT]]
1845;
1846  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
1847  ret double %result
1848}
1849
1850define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double %val) #0 {
1851; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(
1852; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4
1853; IR-NEXT:    ret double [[RESULT]]
1854;
1855  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
1856  ret double %result
1857}
1858
1859define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1 {
1860; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(
1861; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8
1862; IR-NEXT:    ret double [[RESULT]]
1863;
1864  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
1865  ret double %result
1866}
1867
1868define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1 {
1869; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(
1870; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8
1871; IR-NEXT:    ret double [[RESULT]]
1872;
1873  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
1874  ret double %result
1875}
1876
1877define amdgpu_ps double @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 {
1878; IR-LABEL: @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(
1879; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1880; IR-NEXT:    ret double [[RESULT]]
1881;
1882  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1883  ret double %result
1884}
1885
1886define amdgpu_ps double @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 {
1887; IR-LABEL: @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(
1888; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1889; IR-NEXT:    ret double [[RESULT]]
1890;
1891  %result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1892  ret double %result
1893}
1894
1895define amdgpu_ps double @global_atomic_fmin_double_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, double inreg %val) #0 {
1896; IR-LABEL: @global_atomic_fmin_double_div_address_uni_value_agent_scope(
1897; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1898; IR-NEXT:    ret double [[RESULT]]
1899;
1900  %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1901  ret double %result
1902}
1903
1904define amdgpu_ps double @global_atomic_fmin_double_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, double %val) #0 {
1905; IR-LABEL: @global_atomic_fmin_double_div_address_div_value_agent_scope(
1906; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1907; IR-NEXT:    ret double [[RESULT]]
1908;
1909  %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1910  ret double %result
1911}
1912
1913define amdgpu_ps double @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1{
1914; IR-LABEL: @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(
1915; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1916; IR-NEXT:    ret double [[RESULT]]
1917;
1918  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1919  ret double %result
1920}
1921
1922define amdgpu_ps double @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1{
1923; IR-LABEL: @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(
1924; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
1925; IR-NEXT:    ret double [[RESULT]]
1926;
1927  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
1928  ret double %result
1929}
1930
1931define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 {
1932; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(
1933; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4
1934; IR-NEXT:    ret double [[RESULT]]
1935;
1936  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
1937  ret double %result
1938}
1939
1940define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 {
1941; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(
1942; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4
1943; IR-NEXT:    ret double [[RESULT]]
1944;
1945  %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
1946  ret double %result
1947}
1948
1949attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
1950attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
1951attributes #2 = { strictfp }
1952