xref: /llvm-project/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32, -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s
8; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS-DPP %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DPP %s
10; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064-DPP %s
11; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032-DPP %s
12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164-DPP %s
13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132-DPP %s
14
15declare float @div.float.value()
16declare float @div.double.value()
17
18define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
19; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
20; GFX7LESS:       ; %bb.0:
21; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
22; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
23; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
24; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
25; GFX7LESS-NEXT:    s_cbranch_execz .LBB0_3
26; GFX7LESS-NEXT:  ; %bb.1:
27; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
28; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
29; GFX7LESS-NEXT:    s_load_dword s2, s[0:1], 0x0
30; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
31; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
32; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
33; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s2
34; GFX7LESS-NEXT:    s_mov_b32 s2, -1
35; GFX7LESS-NEXT:  .LBB0_2: ; %atomicrmw.start
36; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
37; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
38; GFX7LESS-NEXT:    v_max_f32_e32 v0, 4.0, v0
39; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
40; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v1
41; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v0
42; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
43; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
44; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
45; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
46; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v2
47; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
48; GFX7LESS-NEXT:    s_cbranch_execnz .LBB0_2
49; GFX7LESS-NEXT:  .LBB0_3:
50; GFX7LESS-NEXT:    s_endpgm
51;
52; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
53; GFX9:       ; %bb.0:
54; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
55; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
56; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
57; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
58; GFX9-NEXT:    s_cbranch_execz .LBB0_3
59; GFX9-NEXT:  ; %bb.1:
60; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
61; GFX9-NEXT:    s_mov_b64 s[2:3], 0
62; GFX9-NEXT:    v_mov_b32_e32 v2, 0
63; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
64; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x0
65; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
66; GFX9-NEXT:    v_mov_b32_e32 v1, s4
67; GFX9-NEXT:  .LBB0_2: ; %atomicrmw.start
68; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
69; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
70; GFX9-NEXT:    v_max_f32_e32 v0, 4.0, v0
71; GFX9-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
72; GFX9-NEXT:    s_waitcnt vmcnt(0)
73; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
74; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
75; GFX9-NEXT:    v_mov_b32_e32 v1, v0
76; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
77; GFX9-NEXT:    s_cbranch_execnz .LBB0_2
78; GFX9-NEXT:  .LBB0_3:
79; GFX9-NEXT:    s_endpgm
80;
81; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
82; GFX1064:       ; %bb.0:
83; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
84; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
85; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
86; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
87; GFX1064-NEXT:    s_cbranch_execz .LBB0_2
88; GFX1064-NEXT:  ; %bb.1:
89; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
90; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
91; GFX1064-NEXT:    v_mov_b32_e32 v1, 4.0
92; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
94; GFX1064-NEXT:  .LBB0_2:
95; GFX1064-NEXT:    s_endpgm
96;
97; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
98; GFX1032:       ; %bb.0:
99; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
100; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
101; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
102; GFX1032-NEXT:    s_cbranch_execz .LBB0_2
103; GFX1032-NEXT:  ; %bb.1:
104; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
105; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
106; GFX1032-NEXT:    v_mov_b32_e32 v1, 4.0
107; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
108; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
109; GFX1032-NEXT:  .LBB0_2:
110; GFX1032-NEXT:    s_endpgm
111;
112; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
113; GFX1164:       ; %bb.0:
114; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
115; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
116; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
117; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
118; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
119; GFX1164-NEXT:    s_cbranch_execz .LBB0_2
120; GFX1164-NEXT:  ; %bb.1:
121; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
122; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
123; GFX1164-NEXT:    v_mov_b32_e32 v1, 4.0
124; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
125; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
126; GFX1164-NEXT:  .LBB0_2:
127; GFX1164-NEXT:    s_endpgm
128;
129; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
130; GFX1132:       ; %bb.0:
131; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
132; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
133; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
134; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
135; GFX1132-NEXT:    s_cbranch_execz .LBB0_2
136; GFX1132-NEXT:  ; %bb.1:
137; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
138; GFX1132-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
139; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
140; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
141; GFX1132-NEXT:  .LBB0_2:
142; GFX1132-NEXT:    s_endpgm
143;
144; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
145; GFX7LESS-DPP:       ; %bb.0:
146; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
147; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
148; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
149; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
150; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB0_3
151; GFX7LESS-DPP-NEXT:  ; %bb.1:
152; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
153; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
154; GFX7LESS-DPP-NEXT:    s_load_dword s2, s[0:1], 0x0
155; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
156; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
157; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
158; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s2
159; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
160; GFX7LESS-DPP-NEXT:  .LBB0_2: ; %atomicrmw.start
161; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
162; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
163; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
164; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
165; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v1
166; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v0
167; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
168; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
169; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
170; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
171; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v2
172; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
173; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB0_2
174; GFX7LESS-DPP-NEXT:  .LBB0_3:
175; GFX7LESS-DPP-NEXT:    s_endpgm
176;
177; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
178; GFX9-DPP:       ; %bb.0:
179; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
180; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
181; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
182; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
183; GFX9-DPP-NEXT:    s_cbranch_execz .LBB0_3
184; GFX9-DPP-NEXT:  ; %bb.1:
185; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
186; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
187; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
188; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
189; GFX9-DPP-NEXT:    s_load_dword s4, s[0:1], 0x0
190; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
191; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s4
192; GFX9-DPP-NEXT:  .LBB0_2: ; %atomicrmw.start
193; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
194; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
195; GFX9-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
196; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
197; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
198; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
199; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
200; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
201; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
202; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB0_2
203; GFX9-DPP-NEXT:  .LBB0_3:
204; GFX9-DPP-NEXT:    s_endpgm
205;
206; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
207; GFX1064-DPP:       ; %bb.0:
208; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
209; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
210; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
211; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
212; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB0_2
213; GFX1064-DPP-NEXT:  ; %bb.1:
214; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
215; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
216; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
217; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX1064-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
219; GFX1064-DPP-NEXT:  .LBB0_2:
220; GFX1064-DPP-NEXT:    s_endpgm
221;
222; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
223; GFX1032-DPP:       ; %bb.0:
224; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
225; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
226; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
227; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB0_2
228; GFX1032-DPP-NEXT:  ; %bb.1:
229; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
230; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
231; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
232; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
233; GFX1032-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
234; GFX1032-DPP-NEXT:  .LBB0_2:
235; GFX1032-DPP-NEXT:    s_endpgm
236;
237; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
238; GFX1164-DPP:       ; %bb.0:
239; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
240; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
241; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
242; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
243; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
244; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB0_2
245; GFX1164-DPP-NEXT:  ; %bb.1:
246; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
247; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
248; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
249; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
250; GFX1164-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
251; GFX1164-DPP-NEXT:  .LBB0_2:
252; GFX1164-DPP-NEXT:    s_endpgm
253;
254; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
255; GFX1132-DPP:       ; %bb.0:
256; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
257; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
258; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
259; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
260; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB0_2
261; GFX1132-DPP-NEXT:  ; %bb.1:
262; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
263; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
264; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
265; GFX1132-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
266; GFX1132-DPP-NEXT:  .LBB0_2:
267; GFX1132-DPP-NEXT:    s_endpgm
268  %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1
269  ret void
270}
271
272define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
273; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
274; GFX7LESS:       ; %bb.0:
275; GFX7LESS-NEXT:    s_mov_b32 s32, 0
276; GFX7LESS-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
277; GFX7LESS-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
278; GFX7LESS-NEXT:    s_mov_b32 s38, -1
279; GFX7LESS-NEXT:    s_mov_b32 s39, 0xe8f000
280; GFX7LESS-NEXT:    s_add_u32 s36, s36, s11
281; GFX7LESS-NEXT:    s_addc_u32 s37, s37, 0
282; GFX7LESS-NEXT:    s_mov_b32 s14, s10
283; GFX7LESS-NEXT:    s_mov_b32 s13, s9
284; GFX7LESS-NEXT:    s_mov_b32 s12, s8
285; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[6:7]
286; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[4:5]
287; GFX7LESS-NEXT:    s_add_u32 s8, s34, 44
288; GFX7LESS-NEXT:    s_addc_u32 s9, s35, 0
289; GFX7LESS-NEXT:    s_getpc_b64 s[4:5]
290; GFX7LESS-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
291; GFX7LESS-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
292; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
293; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
294; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
295; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
296; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
297; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
298; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
299; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
300; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[38:39]
301; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
302; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
303; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
304; GFX7LESS-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
305; GFX7LESS-NEXT:  .LBB1_1: ; %ComputeLoop
306; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
307; GFX7LESS-NEXT:    s_ff1_i32_b64 s2, s[0:1]
308; GFX7LESS-NEXT:    v_mul_f32_e32 v1, 1.0, v2
309; GFX7LESS-NEXT:    v_readlane_b32 s4, v0, s2
310; GFX7LESS-NEXT:    s_lshl_b64 s[2:3], 1, s2
311; GFX7LESS-NEXT:    v_mul_f32_e64 v2, 1.0, s4
312; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
313; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
314; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
315; GFX7LESS-NEXT:    v_max_f32_e32 v2, v1, v2
316; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB1_1
317; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
318; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
319; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
320; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
321; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
322; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
323; GFX7LESS-NEXT:    s_cbranch_execz .LBB1_5
324; GFX7LESS-NEXT:  ; %bb.3:
325; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x9
326; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
327; GFX7LESS-NEXT:    s_mov_b32 s2, -1
328; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
329; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[0:3], 0
330; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
331; GFX7LESS-NEXT:    v_mul_f32_e32 v2, 1.0, v2
332; GFX7LESS-NEXT:  .LBB1_4: ; %atomicrmw.start
333; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
334; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
335; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
336; GFX7LESS-NEXT:    v_max_f32_e32 v0, v0, v2
337; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
338; GFX7LESS-NEXT:    v_mov_b32_e32 v4, v1
339; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v0
340; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc
341; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
342; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
343; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
344; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v3
345; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
346; GFX7LESS-NEXT:    s_cbranch_execnz .LBB1_4
347; GFX7LESS-NEXT:  .LBB1_5:
348; GFX7LESS-NEXT:    s_endpgm
349;
350; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
351; GFX9:       ; %bb.0:
352; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
353; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
354; GFX9-NEXT:    s_mov_b32 s38, -1
355; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
356; GFX9-NEXT:    s_add_u32 s36, s36, s11
357; GFX9-NEXT:    s_addc_u32 s37, s37, 0
358; GFX9-NEXT:    s_mov_b64 s[34:35], s[4:5]
359; GFX9-NEXT:    s_mov_b32 s12, s8
360; GFX9-NEXT:    s_add_u32 s8, s34, 44
361; GFX9-NEXT:    s_mov_b32 s13, s9
362; GFX9-NEXT:    s_addc_u32 s9, s35, 0
363; GFX9-NEXT:    s_getpc_b64 s[4:5]
364; GFX9-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
365; GFX9-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
366; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
367; GFX9-NEXT:    s_mov_b32 s14, s10
368; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
369; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
370; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
371; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
372; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
373; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
374; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
375; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
376; GFX9-NEXT:    s_mov_b32 s32, 0
377; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
379; GFX9-NEXT:    s_mov_b64 s[0:1], exec
380; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
381; GFX9-NEXT:  .LBB1_1: ; %ComputeLoop
382; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
383; GFX9-NEXT:    s_ff1_i32_b64 s2, s[0:1]
384; GFX9-NEXT:    v_readlane_b32 s4, v0, s2
385; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s2
386; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
387; GFX9-NEXT:    v_max_f32_e64 v2, s4, s4
388; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
389; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
390; GFX9-NEXT:    v_max_f32_e32 v2, v1, v2
391; GFX9-NEXT:    s_cbranch_scc1 .LBB1_1
392; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
393; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
394; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
395; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
396; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
397; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
398; GFX9-NEXT:    s_cbranch_execz .LBB1_5
399; GFX9-NEXT:  ; %bb.3:
400; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
401; GFX9-NEXT:    v_mov_b32_e32 v3, 0
402; GFX9-NEXT:    s_mov_b64 s[2:3], 0
403; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
404; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
405; GFX9-NEXT:    global_load_dword v1, v3, s[0:1]
406; GFX9-NEXT:  .LBB1_4: ; %atomicrmw.start
407; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
408; GFX9-NEXT:    s_waitcnt vmcnt(0)
409; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
410; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
411; GFX9-NEXT:    global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc
412; GFX9-NEXT:    s_waitcnt vmcnt(0)
413; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
414; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
415; GFX9-NEXT:    v_mov_b32_e32 v1, v0
416; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
417; GFX9-NEXT:    s_cbranch_execnz .LBB1_4
418; GFX9-NEXT:  .LBB1_5:
419; GFX9-NEXT:    s_endpgm
420;
421; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
422; GFX1064:       ; %bb.0:
423; GFX1064-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
424; GFX1064-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
425; GFX1064-NEXT:    s_mov_b32 s38, -1
426; GFX1064-NEXT:    s_mov_b32 s39, 0x31e16000
427; GFX1064-NEXT:    s_add_u32 s36, s36, s11
428; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
429; GFX1064-NEXT:    s_addc_u32 s37, s37, 0
430; GFX1064-NEXT:    s_mov_b32 s12, s8
431; GFX1064-NEXT:    s_add_u32 s8, s34, 44
432; GFX1064-NEXT:    s_mov_b32 s13, s9
433; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
434; GFX1064-NEXT:    s_getpc_b64 s[4:5]
435; GFX1064-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
436; GFX1064-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
437; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
438; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
439; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
440; GFX1064-NEXT:    s_mov_b32 s14, s10
441; GFX1064-NEXT:    s_mov_b64 s[10:11], s[6:7]
442; GFX1064-NEXT:    s_mov_b64 s[4:5], s[0:1]
443; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
444; GFX1064-NEXT:    v_or3_b32 v31, v0, v1, v2
445; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
446; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
447; GFX1064-NEXT:    s_mov_b32 s32, 0
448; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
449; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
450; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
451; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
452; GFX1064-NEXT:  .LBB1_1: ; %ComputeLoop
453; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
454; GFX1064-NEXT:    s_ff1_i32_b64 s2, s[0:1]
455; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v1
456; GFX1064-NEXT:    v_readlane_b32 s3, v0, s2
457; GFX1064-NEXT:    v_max_f32_e64 v2, s3, s3
458; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s2
459; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
460; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
461; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v2
462; GFX1064-NEXT:    s_cbranch_scc1 .LBB1_1
463; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
464; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
465; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
466; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
467; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
468; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
469; GFX1064-NEXT:    s_cbranch_execz .LBB1_4
470; GFX1064-NEXT:  ; %bb.3:
471; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
472; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
473; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
474; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
475; GFX1064-NEXT:  .LBB1_4:
476; GFX1064-NEXT:    s_endpgm
477;
478; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
479; GFX1032:       ; %bb.0:
480; GFX1032-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
481; GFX1032-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
482; GFX1032-NEXT:    s_mov_b32 s38, -1
483; GFX1032-NEXT:    s_mov_b32 s39, 0x31c16000
484; GFX1032-NEXT:    s_add_u32 s36, s36, s11
485; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
486; GFX1032-NEXT:    s_addc_u32 s37, s37, 0
487; GFX1032-NEXT:    s_mov_b32 s12, s8
488; GFX1032-NEXT:    s_add_u32 s8, s34, 44
489; GFX1032-NEXT:    s_mov_b32 s13, s9
490; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
491; GFX1032-NEXT:    s_getpc_b64 s[4:5]
492; GFX1032-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
493; GFX1032-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
494; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
495; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
496; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
497; GFX1032-NEXT:    s_mov_b32 s14, s10
498; GFX1032-NEXT:    s_mov_b64 s[10:11], s[6:7]
499; GFX1032-NEXT:    s_mov_b64 s[4:5], s[0:1]
500; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
501; GFX1032-NEXT:    v_or3_b32 v31, v0, v1, v2
502; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
503; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
504; GFX1032-NEXT:    s_mov_b32 s32, 0
505; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
506; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
507; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
508; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
509; GFX1032-NEXT:  .LBB1_1: ; %ComputeLoop
510; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
511; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
512; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v1
513; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
514; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
515; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
516; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
517; GFX1032-NEXT:    v_max_f32_e64 v2, s2, s2
518; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v2
519; GFX1032-NEXT:    s_cbranch_scc1 .LBB1_1
520; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
521; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
522; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
523; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
524; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
525; GFX1032-NEXT:    s_cbranch_execz .LBB1_4
526; GFX1032-NEXT:  ; %bb.3:
527; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
528; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
529; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
530; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
531; GFX1032-NEXT:  .LBB1_4:
532; GFX1032-NEXT:    s_endpgm
533;
534; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
535; GFX1164:       ; %bb.0:
536; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
537; GFX1164-NEXT:    s_mov_b32 s12, s8
538; GFX1164-NEXT:    s_add_u32 s8, s34, 44
539; GFX1164-NEXT:    s_mov_b32 s13, s9
540; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
541; GFX1164-NEXT:    s_getpc_b64 s[4:5]
542; GFX1164-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
543; GFX1164-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
544; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
545; GFX1164-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
546; GFX1164-NEXT:    s_mov_b32 s14, s10
547; GFX1164-NEXT:    s_mov_b64 s[10:11], s[6:7]
548; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
549; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
550; GFX1164-NEXT:    s_mov_b32 s32, 0
551; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
552; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
553; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
554; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
555; GFX1164-NEXT:  .LBB1_1: ; %ComputeLoop
556; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
557; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
558; GFX1164-NEXT:    s_ctz_i32_b64 s2, s[0:1]
559; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v1
560; GFX1164-NEXT:    v_readlane_b32 s3, v0, s2
561; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
562; GFX1164-NEXT:    v_max_f32_e64 v2, s3, s3
563; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s2
564; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
565; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
566; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
567; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v2
568; GFX1164-NEXT:    s_cbranch_scc1 .LBB1_1
569; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
570; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
571; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
572; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
573; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
574; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
575; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
576; GFX1164-NEXT:    s_cbranch_execz .LBB1_4
577; GFX1164-NEXT:  ; %bb.3:
578; GFX1164-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
579; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
580; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
581; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
582; GFX1164-NEXT:  .LBB1_4:
583; GFX1164-NEXT:    s_endpgm
584;
585; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
586; GFX1132:       ; %bb.0:
587; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
588; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
589; GFX1132-NEXT:    s_add_u32 s8, s34, 44
590; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
591; GFX1132-NEXT:    s_getpc_b64 s[4:5]
592; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
593; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
594; GFX1132-NEXT:    s_mov_b32 s12, s13
595; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
596; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
597; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
598; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
599; GFX1132-NEXT:    s_mov_b32 s13, s14
600; GFX1132-NEXT:    s_mov_b32 s14, s15
601; GFX1132-NEXT:    s_mov_b32 s32, 0
602; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
603; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
604; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
605; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
606; GFX1132-NEXT:  .LBB1_1: ; %ComputeLoop
607; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
608; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
609; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
610; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v1
611; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
612; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
613; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
614; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
615; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
616; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
617; GFX1132-NEXT:    v_max_f32_e64 v2, s2, s2
618; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v2
619; GFX1132-NEXT:    s_cbranch_scc1 .LBB1_1
620; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
621; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
622; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
623; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
624; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
625; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
626; GFX1132-NEXT:    s_cbranch_execz .LBB1_4
627; GFX1132-NEXT:  ; %bb.3:
628; GFX1132-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
629; GFX1132-NEXT:    v_mov_b32_e32 v0, 0
630; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
631; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
632; GFX1132-NEXT:  .LBB1_4:
633; GFX1132-NEXT:    s_endpgm
634;
635; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
636; GFX7LESS-DPP:       ; %bb.0:
637; GFX7LESS-DPP-NEXT:    s_mov_b32 s32, 0
638; GFX7LESS-DPP-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
639; GFX7LESS-DPP-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
640; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, -1
641; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, 0xe8f000
642; GFX7LESS-DPP-NEXT:    s_add_u32 s40, s40, s11
643; GFX7LESS-DPP-NEXT:    s_addc_u32 s41, s41, 0
644; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s10
645; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s9
646; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s8
647; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
648; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x9
649; GFX7LESS-DPP-NEXT:    s_mov_b32 s39, 0xf000
650; GFX7LESS-DPP-NEXT:    s_mov_b32 s38, -1
651; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s4, 44
652; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s5, 0
653; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[4:5]
654; GFX7LESS-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
655; GFX7LESS-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
656; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
657; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
658; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
659; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
660; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
661; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
662; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
663; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[40:41]
664; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[42:43]
665; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
666; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
667; GFX7LESS-DPP-NEXT:    buffer_load_dword v1, off, s[36:39], 0
668; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], 0
669; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v2, 1.0, v0
670; GFX7LESS-DPP-NEXT:  .LBB1_1: ; %atomicrmw.start
671; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
672; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
673; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
674; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, v0, v2
675; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
676; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, v1
677; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v0
678; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
679; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
680; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
681; GFX7LESS-DPP-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
682; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v3
683; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[0:1]
684; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB1_1
685; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
686; GFX7LESS-DPP-NEXT:    s_endpgm
687;
688; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
689; GFX9-DPP:       ; %bb.0:
690; GFX9-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
691; GFX9-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
692; GFX9-DPP-NEXT:    s_mov_b32 s38, -1
693; GFX9-DPP-NEXT:    s_mov_b32 s39, 0xe00000
694; GFX9-DPP-NEXT:    s_add_u32 s36, s36, s11
695; GFX9-DPP-NEXT:    s_addc_u32 s37, s37, 0
696; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
697; GFX9-DPP-NEXT:    s_mov_b32 s12, s8
698; GFX9-DPP-NEXT:    s_add_u32 s8, s34, 44
699; GFX9-DPP-NEXT:    s_mov_b32 s13, s9
700; GFX9-DPP-NEXT:    s_addc_u32 s9, s35, 0
701; GFX9-DPP-NEXT:    s_getpc_b64 s[4:5]
702; GFX9-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
703; GFX9-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
704; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
705; GFX9-DPP-NEXT:    s_mov_b32 s14, s10
706; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
707; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
708; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
709; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
710; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
711; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
712; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
713; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
714; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
715; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
716; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
717; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
718; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
719; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
720; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
721; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v4, v3, v0, s[0:1]
722; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
723; GFX9-DPP-NEXT:    s_nop 1
724; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf
725; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
726; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
727; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
728; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
729; GFX9-DPP-NEXT:    s_nop 1
730; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf
731; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
732; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
733; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
734; GFX9-DPP-NEXT:    s_nop 1
735; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf
736; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
737; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
738; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
739; GFX9-DPP-NEXT:    s_nop 1
740; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf
741; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
742; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
743; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
744; GFX9-DPP-NEXT:    s_nop 1
745; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf
746; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
747; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
748; GFX9-DPP-NEXT:    s_nop 1
749; GFX9-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf
750; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
751; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
752; GFX9-DPP-NEXT:    v_readlane_b32 s4, v3, 63
753; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
754; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
755; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
756; GFX9-DPP-NEXT:    s_cbranch_execz .LBB1_3
757; GFX9-DPP-NEXT:  ; %bb.1:
758; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
759; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
760; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
761; GFX9-DPP-NEXT:    v_max_f32_e64 v6, s4, s4
762; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
763; GFX9-DPP-NEXT:    global_load_dword v1, v2, s[0:1]
764; GFX9-DPP-NEXT:  .LBB1_2: ; %atomicrmw.start
765; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
766; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
767; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
768; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v0, v6
769; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
770; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
771; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
772; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
773; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
774; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
775; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB1_2
776; GFX9-DPP-NEXT:  .LBB1_3:
777; GFX9-DPP-NEXT:    s_endpgm
778;
779; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
780; GFX1064-DPP:       ; %bb.0:
781; GFX1064-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
782; GFX1064-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
783; GFX1064-DPP-NEXT:    s_mov_b32 s38, -1
784; GFX1064-DPP-NEXT:    s_mov_b32 s39, 0x31e16000
785; GFX1064-DPP-NEXT:    s_add_u32 s36, s36, s11
786; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
787; GFX1064-DPP-NEXT:    s_addc_u32 s37, s37, 0
788; GFX1064-DPP-NEXT:    s_mov_b32 s12, s8
789; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
790; GFX1064-DPP-NEXT:    s_mov_b32 s13, s9
791; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
792; GFX1064-DPP-NEXT:    s_getpc_b64 s[4:5]
793; GFX1064-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
794; GFX1064-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
795; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
796; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
797; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
798; GFX1064-DPP-NEXT:    s_mov_b32 s14, s10
799; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
800; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
801; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
802; GFX1064-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
803; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
804; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
805; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
806; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
807; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
808; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
809; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
810; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1]
811; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
812; GFX1064-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
813; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
814; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
815; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
816; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
817; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
818; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
819; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
820; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
821; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
822; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
823; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
824; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
825; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
826; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
827; GFX1064-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
828; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
829; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
830; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v3, 32
831; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v3, 0
832; GFX1064-DPP-NEXT:    v_max_f32_e64 v3, s2, s2
833; GFX1064-DPP-NEXT:    v_max_f32_e64 v4, s3, s3
834; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
835; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
836; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
837; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
838; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
839; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
840; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, v3
841; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
842; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
843; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB1_2
844; GFX1064-DPP-NEXT:  ; %bb.1:
845; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
846; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
847; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
848; GFX1064-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
849; GFX1064-DPP-NEXT:  .LBB1_2:
850; GFX1064-DPP-NEXT:    s_endpgm
851;
852; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
853; GFX1032-DPP:       ; %bb.0:
854; GFX1032-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
855; GFX1032-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
856; GFX1032-DPP-NEXT:    s_mov_b32 s38, -1
857; GFX1032-DPP-NEXT:    s_mov_b32 s39, 0x31c16000
858; GFX1032-DPP-NEXT:    s_add_u32 s36, s36, s11
859; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
860; GFX1032-DPP-NEXT:    s_addc_u32 s37, s37, 0
861; GFX1032-DPP-NEXT:    s_mov_b32 s12, s8
862; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
863; GFX1032-DPP-NEXT:    s_mov_b32 s13, s9
864; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
865; GFX1032-DPP-NEXT:    s_getpc_b64 s[4:5]
866; GFX1032-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
867; GFX1032-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
868; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
869; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
870; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
871; GFX1032-DPP-NEXT:    s_mov_b32 s14, s10
872; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
873; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
874; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
875; GFX1032-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
876; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
877; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
878; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
879; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
880; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
881; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
882; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
883; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0
884; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
885; GFX1032-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
886; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
887; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
888; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
889; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
890; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
891; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
892; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
893; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
894; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
895; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
896; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
897; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
898; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
899; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
900; GFX1032-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
901; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
902; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
903; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
904; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
905; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, v3
906; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
907; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
908; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB1_2
909; GFX1032-DPP-NEXT:  ; %bb.1:
910; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
911; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
912; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
913; GFX1032-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
914; GFX1032-DPP-NEXT:  .LBB1_2:
915; GFX1032-DPP-NEXT:    s_endpgm
916;
917; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
918; GFX1164-DPP:       ; %bb.0:
919; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
920; GFX1164-DPP-NEXT:    s_mov_b32 s12, s8
921; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
922; GFX1164-DPP-NEXT:    s_mov_b32 s13, s9
923; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
924; GFX1164-DPP-NEXT:    s_getpc_b64 s[4:5]
925; GFX1164-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
926; GFX1164-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
927; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
928; GFX1164-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
929; GFX1164-DPP-NEXT:    s_mov_b32 s14, s10
930; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
931; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
932; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
933; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
934; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
935; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
936; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
937; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
938; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1]
939; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
940; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
941; GFX1164-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
942; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
943; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v1
944; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
945; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
946; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
947; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
948; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
949; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
950; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
951; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
952; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
953; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
954; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
955; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
956; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
957; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
958; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
959; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
960; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
961; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
962; GFX1164-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
963; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
964; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
965; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
966; GFX1164-DPP-NEXT:    v_permlane64_b32 v2, v1
967; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
968; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
969; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
970; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
971; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
972; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
973; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
974; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
975; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
976; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v4, exec_hi, v0
977; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
978; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, v1
979; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
980; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
981; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB1_2
982; GFX1164-DPP-NEXT:  ; %bb.1:
983; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
984; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
985; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
986; GFX1164-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
987; GFX1164-DPP-NEXT:  .LBB1_2:
988; GFX1164-DPP-NEXT:    s_endpgm
989;
990; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
991; GFX1132-DPP:       ; %bb.0:
992; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
993; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
994; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
995; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
996; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
997; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
998; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
999; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
1000; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
1001; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1002; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1003; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1004; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
1005; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
1006; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
1007; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1008; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1009; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
1010; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1011; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
1012; GFX1132-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1013; GFX1132-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1014; GFX1132-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
1015; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1016; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1
1017; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
1018; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1019; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
1020; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
1021; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1022; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1023; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
1024; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1025; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
1026; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1027; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1028; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
1029; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
1030; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1031; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1032; GFX1132-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
1033; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1034; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
1035; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1036; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
1037; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1038; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v4, exec_lo, 0
1039; GFX1132-DPP-NEXT:    v_mov_b32_e32 v0, v1
1040; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
1041; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1042; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
1043; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB1_2
1044; GFX1132-DPP-NEXT:  ; %bb.1:
1045; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
1046; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, 0
1047; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1048; GFX1132-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
1049; GFX1132-DPP-NEXT:  .LBB1_2:
1050; GFX1132-DPP-NEXT:    s_endpgm
1051  %divValue = call float @div.float.value()
1052  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1
1053  ret void
1054}
1055
1056define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
1057; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1058; GFX7LESS:       ; %bb.0:
1059; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
1060; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
1061; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1062; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1063; GFX7LESS-NEXT:    s_cbranch_execz .LBB2_3
1064; GFX7LESS-NEXT:  ; %bb.1:
1065; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1066; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
1067; GFX7LESS-NEXT:    s_load_dword s2, s[0:1], 0x0
1068; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
1069; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
1070; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
1071; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s2
1072; GFX7LESS-NEXT:    s_mov_b32 s2, -1
1073; GFX7LESS-NEXT:  .LBB2_2: ; %atomicrmw.start
1074; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
1075; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
1076; GFX7LESS-NEXT:    v_max_f32_e32 v0, 4.0, v0
1077; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
1078; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v1
1079; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v0
1080; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
1081; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
1082; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
1083; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1084; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v2
1085; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1086; GFX7LESS-NEXT:    s_cbranch_execnz .LBB2_2
1087; GFX7LESS-NEXT:  .LBB2_3:
1088; GFX7LESS-NEXT:    s_endpgm
1089;
1090; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1091; GFX9:       ; %bb.0:
1092; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1093; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1094; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1095; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1096; GFX9-NEXT:    s_cbranch_execz .LBB2_3
1097; GFX9-NEXT:  ; %bb.1:
1098; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1099; GFX9-NEXT:    s_mov_b64 s[2:3], 0
1100; GFX9-NEXT:    v_mov_b32_e32 v2, 0
1101; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1102; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x0
1103; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1104; GFX9-NEXT:    v_mov_b32_e32 v1, s4
1105; GFX9-NEXT:  .LBB2_2: ; %atomicrmw.start
1106; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1107; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
1108; GFX9-NEXT:    v_max_f32_e32 v0, 4.0, v0
1109; GFX9-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
1110; GFX9-NEXT:    s_waitcnt vmcnt(0)
1111; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
1112; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
1113; GFX9-NEXT:    v_mov_b32_e32 v1, v0
1114; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
1115; GFX9-NEXT:    s_cbranch_execnz .LBB2_2
1116; GFX9-NEXT:  .LBB2_3:
1117; GFX9-NEXT:    s_endpgm
1118;
1119; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1120; GFX1064:       ; %bb.0:
1121; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1122; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1123; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1124; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1125; GFX1064-NEXT:    s_cbranch_execz .LBB2_2
1126; GFX1064-NEXT:  ; %bb.1:
1127; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1128; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
1129; GFX1064-NEXT:    v_mov_b32_e32 v1, 4.0
1130; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
1131; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1132; GFX1064-NEXT:  .LBB2_2:
1133; GFX1064-NEXT:    s_endpgm
1134;
1135; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1136; GFX1032:       ; %bb.0:
1137; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1138; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1139; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1140; GFX1032-NEXT:    s_cbranch_execz .LBB2_2
1141; GFX1032-NEXT:  ; %bb.1:
1142; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1143; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
1144; GFX1032-NEXT:    v_mov_b32_e32 v1, 4.0
1145; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
1146; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1147; GFX1032-NEXT:  .LBB2_2:
1148; GFX1032-NEXT:    s_endpgm
1149;
1150; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1151; GFX1164:       ; %bb.0:
1152; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1153; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
1154; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1155; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1156; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
1157; GFX1164-NEXT:    s_cbranch_execz .LBB2_2
1158; GFX1164-NEXT:  ; %bb.1:
1159; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1160; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
1161; GFX1164-NEXT:    v_mov_b32_e32 v1, 4.0
1162; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
1163; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1164; GFX1164-NEXT:  .LBB2_2:
1165; GFX1164-NEXT:    s_endpgm
1166;
1167; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1168; GFX1132:       ; %bb.0:
1169; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1170; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
1171; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1172; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
1173; GFX1132-NEXT:    s_cbranch_execz .LBB2_2
1174; GFX1132-NEXT:  ; %bb.1:
1175; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1176; GFX1132-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
1177; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
1178; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1179; GFX1132-NEXT:  .LBB2_2:
1180; GFX1132-NEXT:    s_endpgm
1181;
1182; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1183; GFX7LESS-DPP:       ; %bb.0:
1184; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
1185; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
1186; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1187; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1188; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB2_3
1189; GFX7LESS-DPP-NEXT:  ; %bb.1:
1190; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1191; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1192; GFX7LESS-DPP-NEXT:    s_load_dword s2, s[0:1], 0x0
1193; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
1194; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
1195; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1196; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s2
1197; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
1198; GFX7LESS-DPP-NEXT:  .LBB2_2: ; %atomicrmw.start
1199; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
1200; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
1201; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
1202; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
1203; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v1
1204; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v0
1205; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
1206; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
1207; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
1208; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1209; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v2
1210; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1211; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB2_2
1212; GFX7LESS-DPP-NEXT:  .LBB2_3:
1213; GFX7LESS-DPP-NEXT:    s_endpgm
1214;
1215; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1216; GFX9-DPP:       ; %bb.0:
1217; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1218; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1219; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1220; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1221; GFX9-DPP-NEXT:    s_cbranch_execz .LBB2_3
1222; GFX9-DPP-NEXT:  ; %bb.1:
1223; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1224; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
1225; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
1226; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1227; GFX9-DPP-NEXT:    s_load_dword s4, s[0:1], 0x0
1228; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1229; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s4
1230; GFX9-DPP-NEXT:  .LBB2_2: ; %atomicrmw.start
1231; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
1232; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
1233; GFX9-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
1234; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
1235; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
1236; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
1237; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
1238; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
1239; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
1240; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB2_2
1241; GFX9-DPP-NEXT:  .LBB2_3:
1242; GFX9-DPP-NEXT:    s_endpgm
1243;
1244; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1245; GFX1064-DPP:       ; %bb.0:
1246; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1247; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1248; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1249; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1250; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB2_2
1251; GFX1064-DPP-NEXT:  ; %bb.1:
1252; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1253; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
1254; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
1255; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1256; GFX1064-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1257; GFX1064-DPP-NEXT:  .LBB2_2:
1258; GFX1064-DPP-NEXT:    s_endpgm
1259;
1260; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1261; GFX1032-DPP:       ; %bb.0:
1262; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1263; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1264; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1265; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB2_2
1266; GFX1032-DPP-NEXT:  ; %bb.1:
1267; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1268; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
1269; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
1270; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1271; GFX1032-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1272; GFX1032-DPP-NEXT:  .LBB2_2:
1273; GFX1032-DPP-NEXT:    s_endpgm
1274;
1275; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1276; GFX1164-DPP:       ; %bb.0:
1277; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1278; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
1279; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1280; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1281; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
1282; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB2_2
1283; GFX1164-DPP-NEXT:  ; %bb.1:
1284; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1285; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
1286; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
1287; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1288; GFX1164-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1289; GFX1164-DPP-NEXT:  .LBB2_2:
1290; GFX1164-DPP-NEXT:    s_endpgm
1291;
1292; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
1293; GFX1132-DPP:       ; %bb.0:
1294; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1295; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
1296; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1297; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
1298; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB2_2
1299; GFX1132-DPP-NEXT:  ; %bb.1:
1300; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1301; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
1302; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1303; GFX1132-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1304; GFX1132-DPP-NEXT:  .LBB2_2:
1305; GFX1132-DPP-NEXT:    s_endpgm
1306  %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1
1307  ret void
1308}
1309
1310
1311define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
1312; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1313; GFX7LESS:       ; %bb.0:
1314; GFX7LESS-NEXT:    s_mov_b32 s32, 0
1315; GFX7LESS-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1316; GFX7LESS-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1317; GFX7LESS-NEXT:    s_mov_b32 s38, -1
1318; GFX7LESS-NEXT:    s_mov_b32 s39, 0xe8f000
1319; GFX7LESS-NEXT:    s_add_u32 s36, s36, s11
1320; GFX7LESS-NEXT:    s_addc_u32 s37, s37, 0
1321; GFX7LESS-NEXT:    s_mov_b32 s14, s10
1322; GFX7LESS-NEXT:    s_mov_b32 s13, s9
1323; GFX7LESS-NEXT:    s_mov_b32 s12, s8
1324; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[6:7]
1325; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[4:5]
1326; GFX7LESS-NEXT:    s_add_u32 s8, s34, 44
1327; GFX7LESS-NEXT:    s_addc_u32 s9, s35, 0
1328; GFX7LESS-NEXT:    s_getpc_b64 s[4:5]
1329; GFX7LESS-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1330; GFX7LESS-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1331; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1332; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1333; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1334; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
1335; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
1336; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
1337; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
1338; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
1339; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[38:39]
1340; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
1341; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1342; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
1343; GFX7LESS-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
1344; GFX7LESS-NEXT:  .LBB3_1: ; %ComputeLoop
1345; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
1346; GFX7LESS-NEXT:    s_ff1_i32_b64 s2, s[0:1]
1347; GFX7LESS-NEXT:    v_mul_f32_e32 v1, 1.0, v2
1348; GFX7LESS-NEXT:    v_readlane_b32 s4, v0, s2
1349; GFX7LESS-NEXT:    s_lshl_b64 s[2:3], 1, s2
1350; GFX7LESS-NEXT:    v_mul_f32_e64 v2, 1.0, s4
1351; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
1352; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
1353; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
1354; GFX7LESS-NEXT:    v_max_f32_e32 v2, v1, v2
1355; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB3_1
1356; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
1357; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
1358; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
1359; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1360; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1361; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
1362; GFX7LESS-NEXT:    s_cbranch_execz .LBB3_5
1363; GFX7LESS-NEXT:  ; %bb.3:
1364; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x9
1365; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
1366; GFX7LESS-NEXT:    s_mov_b32 s2, -1
1367; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
1368; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[0:3], 0
1369; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
1370; GFX7LESS-NEXT:    v_mul_f32_e32 v2, 1.0, v2
1371; GFX7LESS-NEXT:  .LBB3_4: ; %atomicrmw.start
1372; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
1373; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
1374; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
1375; GFX7LESS-NEXT:    v_max_f32_e32 v0, v0, v2
1376; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
1377; GFX7LESS-NEXT:    v_mov_b32_e32 v4, v1
1378; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v0
1379; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc
1380; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
1381; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
1382; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1383; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v3
1384; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1385; GFX7LESS-NEXT:    s_cbranch_execnz .LBB3_4
1386; GFX7LESS-NEXT:  .LBB3_5:
1387; GFX7LESS-NEXT:    s_endpgm
1388;
1389; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1390; GFX9:       ; %bb.0:
1391; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1392; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1393; GFX9-NEXT:    s_mov_b32 s38, -1
1394; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1395; GFX9-NEXT:    s_add_u32 s36, s36, s11
1396; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1397; GFX9-NEXT:    s_mov_b64 s[34:35], s[4:5]
1398; GFX9-NEXT:    s_mov_b32 s12, s8
1399; GFX9-NEXT:    s_add_u32 s8, s34, 44
1400; GFX9-NEXT:    s_mov_b32 s13, s9
1401; GFX9-NEXT:    s_addc_u32 s9, s35, 0
1402; GFX9-NEXT:    s_getpc_b64 s[4:5]
1403; GFX9-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1404; GFX9-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1405; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1406; GFX9-NEXT:    s_mov_b32 s14, s10
1407; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
1408; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1409; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1410; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
1411; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
1412; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1413; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
1414; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1415; GFX9-NEXT:    s_mov_b32 s32, 0
1416; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1417; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1418; GFX9-NEXT:    s_mov_b64 s[0:1], exec
1419; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
1420; GFX9-NEXT:  .LBB3_1: ; %ComputeLoop
1421; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1422; GFX9-NEXT:    s_ff1_i32_b64 s2, s[0:1]
1423; GFX9-NEXT:    v_readlane_b32 s4, v0, s2
1424; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s2
1425; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
1426; GFX9-NEXT:    v_max_f32_e64 v2, s4, s4
1427; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
1428; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
1429; GFX9-NEXT:    v_max_f32_e32 v2, v1, v2
1430; GFX9-NEXT:    s_cbranch_scc1 .LBB3_1
1431; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
1432; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1433; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1434; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1435; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1436; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
1437; GFX9-NEXT:    s_cbranch_execz .LBB3_5
1438; GFX9-NEXT:  ; %bb.3:
1439; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1440; GFX9-NEXT:    v_mov_b32_e32 v3, 0
1441; GFX9-NEXT:    s_mov_b64 s[2:3], 0
1442; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
1443; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1444; GFX9-NEXT:    global_load_dword v1, v3, s[0:1]
1445; GFX9-NEXT:  .LBB3_4: ; %atomicrmw.start
1446; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1447; GFX9-NEXT:    s_waitcnt vmcnt(0)
1448; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
1449; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
1450; GFX9-NEXT:    global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc
1451; GFX9-NEXT:    s_waitcnt vmcnt(0)
1452; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
1453; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
1454; GFX9-NEXT:    v_mov_b32_e32 v1, v0
1455; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
1456; GFX9-NEXT:    s_cbranch_execnz .LBB3_4
1457; GFX9-NEXT:  .LBB3_5:
1458; GFX9-NEXT:    s_endpgm
1459;
1460; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1461; GFX1064:       ; %bb.0:
1462; GFX1064-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1463; GFX1064-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1464; GFX1064-NEXT:    s_mov_b32 s38, -1
1465; GFX1064-NEXT:    s_mov_b32 s39, 0x31e16000
1466; GFX1064-NEXT:    s_add_u32 s36, s36, s11
1467; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
1468; GFX1064-NEXT:    s_addc_u32 s37, s37, 0
1469; GFX1064-NEXT:    s_mov_b32 s12, s8
1470; GFX1064-NEXT:    s_add_u32 s8, s34, 44
1471; GFX1064-NEXT:    s_mov_b32 s13, s9
1472; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
1473; GFX1064-NEXT:    s_getpc_b64 s[4:5]
1474; GFX1064-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1475; GFX1064-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1476; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1477; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1478; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1479; GFX1064-NEXT:    s_mov_b32 s14, s10
1480; GFX1064-NEXT:    s_mov_b64 s[10:11], s[6:7]
1481; GFX1064-NEXT:    s_mov_b64 s[4:5], s[0:1]
1482; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
1483; GFX1064-NEXT:    v_or3_b32 v31, v0, v1, v2
1484; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
1485; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
1486; GFX1064-NEXT:    s_mov_b32 s32, 0
1487; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
1488; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1489; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1490; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
1491; GFX1064-NEXT:  .LBB3_1: ; %ComputeLoop
1492; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
1493; GFX1064-NEXT:    s_ff1_i32_b64 s2, s[0:1]
1494; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v1
1495; GFX1064-NEXT:    v_readlane_b32 s3, v0, s2
1496; GFX1064-NEXT:    v_max_f32_e64 v2, s3, s3
1497; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s2
1498; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
1499; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
1500; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v2
1501; GFX1064-NEXT:    s_cbranch_scc1 .LBB3_1
1502; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
1503; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1504; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1505; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1506; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1507; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
1508; GFX1064-NEXT:    s_cbranch_execz .LBB3_4
1509; GFX1064-NEXT:  ; %bb.3:
1510; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1511; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
1512; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
1513; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1514; GFX1064-NEXT:  .LBB3_4:
1515; GFX1064-NEXT:    s_endpgm
1516;
1517; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1518; GFX1032:       ; %bb.0:
1519; GFX1032-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1520; GFX1032-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1521; GFX1032-NEXT:    s_mov_b32 s38, -1
1522; GFX1032-NEXT:    s_mov_b32 s39, 0x31c16000
1523; GFX1032-NEXT:    s_add_u32 s36, s36, s11
1524; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
1525; GFX1032-NEXT:    s_addc_u32 s37, s37, 0
1526; GFX1032-NEXT:    s_mov_b32 s12, s8
1527; GFX1032-NEXT:    s_add_u32 s8, s34, 44
1528; GFX1032-NEXT:    s_mov_b32 s13, s9
1529; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
1530; GFX1032-NEXT:    s_getpc_b64 s[4:5]
1531; GFX1032-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1532; GFX1032-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1533; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1534; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1535; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1536; GFX1032-NEXT:    s_mov_b32 s14, s10
1537; GFX1032-NEXT:    s_mov_b64 s[10:11], s[6:7]
1538; GFX1032-NEXT:    s_mov_b64 s[4:5], s[0:1]
1539; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
1540; GFX1032-NEXT:    v_or3_b32 v31, v0, v1, v2
1541; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
1542; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
1543; GFX1032-NEXT:    s_mov_b32 s32, 0
1544; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
1545; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1546; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1547; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
1548; GFX1032-NEXT:  .LBB3_1: ; %ComputeLoop
1549; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
1550; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
1551; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v1
1552; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
1553; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
1554; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
1555; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
1556; GFX1032-NEXT:    v_max_f32_e64 v2, s2, s2
1557; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v2
1558; GFX1032-NEXT:    s_cbranch_scc1 .LBB3_1
1559; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
1560; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1561; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1562; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1563; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
1564; GFX1032-NEXT:    s_cbranch_execz .LBB3_4
1565; GFX1032-NEXT:  ; %bb.3:
1566; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1567; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
1568; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
1569; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
1570; GFX1032-NEXT:  .LBB3_4:
1571; GFX1032-NEXT:    s_endpgm
1572;
1573; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1574; GFX1164:       ; %bb.0:
1575; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
1576; GFX1164-NEXT:    s_mov_b32 s12, s8
1577; GFX1164-NEXT:    s_add_u32 s8, s34, 44
1578; GFX1164-NEXT:    s_mov_b32 s13, s9
1579; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
1580; GFX1164-NEXT:    s_getpc_b64 s[4:5]
1581; GFX1164-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1582; GFX1164-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1583; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
1584; GFX1164-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
1585; GFX1164-NEXT:    s_mov_b32 s14, s10
1586; GFX1164-NEXT:    s_mov_b64 s[10:11], s[6:7]
1587; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
1588; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
1589; GFX1164-NEXT:    s_mov_b32 s32, 0
1590; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
1591; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1592; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1593; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
1594; GFX1164-NEXT:  .LBB3_1: ; %ComputeLoop
1595; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
1596; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1597; GFX1164-NEXT:    s_ctz_i32_b64 s2, s[0:1]
1598; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v1
1599; GFX1164-NEXT:    v_readlane_b32 s3, v0, s2
1600; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1601; GFX1164-NEXT:    v_max_f32_e64 v2, s3, s3
1602; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s2
1603; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
1604; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1605; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
1606; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v2
1607; GFX1164-NEXT:    s_cbranch_scc1 .LBB3_1
1608; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
1609; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1610; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
1611; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1612; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
1613; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
1614; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
1615; GFX1164-NEXT:    s_cbranch_execz .LBB3_4
1616; GFX1164-NEXT:  ; %bb.3:
1617; GFX1164-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
1618; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
1619; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
1620; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1621; GFX1164-NEXT:  .LBB3_4:
1622; GFX1164-NEXT:    s_endpgm
1623;
1624; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1625; GFX1132:       ; %bb.0:
1626; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
1627; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
1628; GFX1132-NEXT:    s_add_u32 s8, s34, 44
1629; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
1630; GFX1132-NEXT:    s_getpc_b64 s[4:5]
1631; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1632; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1633; GFX1132-NEXT:    s_mov_b32 s12, s13
1634; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
1635; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
1636; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
1637; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
1638; GFX1132-NEXT:    s_mov_b32 s13, s14
1639; GFX1132-NEXT:    s_mov_b32 s14, s15
1640; GFX1132-NEXT:    s_mov_b32 s32, 0
1641; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
1642; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1643; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1644; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
1645; GFX1132-NEXT:  .LBB3_1: ; %ComputeLoop
1646; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
1647; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1648; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
1649; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v1
1650; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
1651; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
1652; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1653; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
1654; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
1655; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1656; GFX1132-NEXT:    v_max_f32_e64 v2, s2, s2
1657; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v2
1658; GFX1132-NEXT:    s_cbranch_scc1 .LBB3_1
1659; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
1660; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1661; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
1662; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1663; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
1664; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
1665; GFX1132-NEXT:    s_cbranch_execz .LBB3_4
1666; GFX1132-NEXT:  ; %bb.3:
1667; GFX1132-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
1668; GFX1132-NEXT:    v_mov_b32_e32 v0, 0
1669; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
1670; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
1671; GFX1132-NEXT:  .LBB3_4:
1672; GFX1132-NEXT:    s_endpgm
1673;
1674; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1675; GFX7LESS-DPP:       ; %bb.0:
1676; GFX7LESS-DPP-NEXT:    s_mov_b32 s32, 0
1677; GFX7LESS-DPP-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
1678; GFX7LESS-DPP-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
1679; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, -1
1680; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, 0xe8f000
1681; GFX7LESS-DPP-NEXT:    s_add_u32 s40, s40, s11
1682; GFX7LESS-DPP-NEXT:    s_addc_u32 s41, s41, 0
1683; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s10
1684; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s9
1685; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s8
1686; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1687; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x9
1688; GFX7LESS-DPP-NEXT:    s_mov_b32 s39, 0xf000
1689; GFX7LESS-DPP-NEXT:    s_mov_b32 s38, -1
1690; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s4, 44
1691; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s5, 0
1692; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[4:5]
1693; GFX7LESS-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1694; GFX7LESS-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1695; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1696; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1697; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1698; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
1699; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
1700; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1701; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1702; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[40:41]
1703; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[42:43]
1704; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1705; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1706; GFX7LESS-DPP-NEXT:    buffer_load_dword v1, off, s[36:39], 0
1707; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], 0
1708; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v2, 1.0, v0
1709; GFX7LESS-DPP-NEXT:  .LBB3_1: ; %atomicrmw.start
1710; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
1711; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
1712; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
1713; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, v0, v2
1714; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
1715; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, v1
1716; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v0
1717; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
1718; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
1719; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
1720; GFX7LESS-DPP-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1721; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v3
1722; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1723; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB3_1
1724; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
1725; GFX7LESS-DPP-NEXT:    s_endpgm
1726;
1727; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1728; GFX9-DPP:       ; %bb.0:
1729; GFX9-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1730; GFX9-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1731; GFX9-DPP-NEXT:    s_mov_b32 s38, -1
1732; GFX9-DPP-NEXT:    s_mov_b32 s39, 0xe00000
1733; GFX9-DPP-NEXT:    s_add_u32 s36, s36, s11
1734; GFX9-DPP-NEXT:    s_addc_u32 s37, s37, 0
1735; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
1736; GFX9-DPP-NEXT:    s_mov_b32 s12, s8
1737; GFX9-DPP-NEXT:    s_add_u32 s8, s34, 44
1738; GFX9-DPP-NEXT:    s_mov_b32 s13, s9
1739; GFX9-DPP-NEXT:    s_addc_u32 s9, s35, 0
1740; GFX9-DPP-NEXT:    s_getpc_b64 s[4:5]
1741; GFX9-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1742; GFX9-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1743; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1744; GFX9-DPP-NEXT:    s_mov_b32 s14, s10
1745; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1746; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1747; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1748; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1749; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1750; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
1751; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
1752; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
1753; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
1754; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1755; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1756; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
1757; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
1758; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
1759; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1760; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v4, v3, v0, s[0:1]
1761; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1762; GFX9-DPP-NEXT:    s_nop 1
1763; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf
1764; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
1765; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
1766; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
1767; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1768; GFX9-DPP-NEXT:    s_nop 1
1769; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf
1770; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
1771; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
1772; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1773; GFX9-DPP-NEXT:    s_nop 1
1774; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf
1775; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
1776; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
1777; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1778; GFX9-DPP-NEXT:    s_nop 1
1779; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf
1780; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
1781; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
1782; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1783; GFX9-DPP-NEXT:    s_nop 1
1784; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf
1785; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
1786; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
1787; GFX9-DPP-NEXT:    s_nop 1
1788; GFX9-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf
1789; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
1790; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
1791; GFX9-DPP-NEXT:    v_readlane_b32 s4, v3, 63
1792; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
1793; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1794; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1795; GFX9-DPP-NEXT:    s_cbranch_execz .LBB3_3
1796; GFX9-DPP-NEXT:  ; %bb.1:
1797; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1798; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
1799; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
1800; GFX9-DPP-NEXT:    v_max_f32_e64 v6, s4, s4
1801; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1802; GFX9-DPP-NEXT:    global_load_dword v1, v2, s[0:1]
1803; GFX9-DPP-NEXT:  .LBB3_2: ; %atomicrmw.start
1804; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
1805; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
1806; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
1807; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v0, v6
1808; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
1809; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
1810; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
1811; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
1812; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
1813; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
1814; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB3_2
1815; GFX9-DPP-NEXT:  .LBB3_3:
1816; GFX9-DPP-NEXT:    s_endpgm
1817;
1818; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1819; GFX1064-DPP:       ; %bb.0:
1820; GFX1064-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1821; GFX1064-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1822; GFX1064-DPP-NEXT:    s_mov_b32 s38, -1
1823; GFX1064-DPP-NEXT:    s_mov_b32 s39, 0x31e16000
1824; GFX1064-DPP-NEXT:    s_add_u32 s36, s36, s11
1825; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
1826; GFX1064-DPP-NEXT:    s_addc_u32 s37, s37, 0
1827; GFX1064-DPP-NEXT:    s_mov_b32 s12, s8
1828; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
1829; GFX1064-DPP-NEXT:    s_mov_b32 s13, s9
1830; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
1831; GFX1064-DPP-NEXT:    s_getpc_b64 s[4:5]
1832; GFX1064-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1833; GFX1064-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1834; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1835; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1836; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1837; GFX1064-DPP-NEXT:    s_mov_b32 s14, s10
1838; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1839; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1840; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1841; GFX1064-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
1842; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
1843; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
1844; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
1845; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1846; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1847; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
1848; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1849; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1]
1850; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1851; GFX1064-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
1852; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
1853; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
1854; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
1855; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
1856; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1857; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1858; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1859; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
1860; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1861; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1862; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1863; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
1864; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1865; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1866; GFX1064-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
1867; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
1868; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1869; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v3, 32
1870; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v3, 0
1871; GFX1064-DPP-NEXT:    v_max_f32_e64 v3, s2, s2
1872; GFX1064-DPP-NEXT:    v_max_f32_e64 v4, s3, s3
1873; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
1874; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
1875; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
1876; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
1877; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
1878; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
1879; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, v3
1880; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1881; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1882; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB3_2
1883; GFX1064-DPP-NEXT:  ; %bb.1:
1884; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1885; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
1886; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1887; GFX1064-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
1888; GFX1064-DPP-NEXT:  .LBB3_2:
1889; GFX1064-DPP-NEXT:    s_endpgm
1890;
1891; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1892; GFX1032-DPP:       ; %bb.0:
1893; GFX1032-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1894; GFX1032-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1895; GFX1032-DPP-NEXT:    s_mov_b32 s38, -1
1896; GFX1032-DPP-NEXT:    s_mov_b32 s39, 0x31c16000
1897; GFX1032-DPP-NEXT:    s_add_u32 s36, s36, s11
1898; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
1899; GFX1032-DPP-NEXT:    s_addc_u32 s37, s37, 0
1900; GFX1032-DPP-NEXT:    s_mov_b32 s12, s8
1901; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
1902; GFX1032-DPP-NEXT:    s_mov_b32 s13, s9
1903; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
1904; GFX1032-DPP-NEXT:    s_getpc_b64 s[4:5]
1905; GFX1032-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1906; GFX1032-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1907; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
1908; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
1909; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
1910; GFX1032-DPP-NEXT:    s_mov_b32 s14, s10
1911; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1912; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1913; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1914; GFX1032-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
1915; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
1916; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
1917; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
1918; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1919; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1920; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
1921; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1922; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0
1923; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1924; GFX1032-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
1925; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
1926; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
1927; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
1928; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
1929; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1930; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1931; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1932; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
1933; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1934; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
1935; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1936; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
1937; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
1938; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1939; GFX1032-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
1940; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
1941; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
1942; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
1943; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
1944; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, v3
1945; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
1946; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1947; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB3_2
1948; GFX1032-DPP-NEXT:  ; %bb.1:
1949; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
1950; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
1951; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1952; GFX1032-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
1953; GFX1032-DPP-NEXT:  .LBB3_2:
1954; GFX1032-DPP-NEXT:    s_endpgm
1955;
1956; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
1957; GFX1164-DPP:       ; %bb.0:
1958; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
1959; GFX1164-DPP-NEXT:    s_mov_b32 s12, s8
1960; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
1961; GFX1164-DPP-NEXT:    s_mov_b32 s13, s9
1962; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
1963; GFX1164-DPP-NEXT:    s_getpc_b64 s[4:5]
1964; GFX1164-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
1965; GFX1164-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
1966; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
1967; GFX1164-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
1968; GFX1164-DPP-NEXT:    s_mov_b32 s14, s10
1969; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
1970; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
1971; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
1972; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
1973; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
1974; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1975; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
1976; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
1977; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1]
1978; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1979; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1980; GFX1164-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
1981; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
1982; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v1
1983; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1984; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
1985; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
1986; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1987; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
1988; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1989; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1990; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1991; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
1992; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
1993; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
1994; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1995; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
1996; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
1997; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1998; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
1999; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2000; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2001; GFX1164-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
2002; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
2003; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2004; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2005; GFX1164-DPP-NEXT:    v_permlane64_b32 v2, v1
2006; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2007; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
2008; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
2009; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2010; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
2011; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2012; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2013; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
2014; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
2015; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v4, exec_hi, v0
2016; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2017; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, v1
2018; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
2019; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
2020; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB3_2
2021; GFX1164-DPP-NEXT:  ; %bb.1:
2022; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
2023; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
2024; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2025; GFX1164-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
2026; GFX1164-DPP-NEXT:  .LBB3_2:
2027; GFX1164-DPP-NEXT:    s_endpgm
2028;
2029; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
2030; GFX1132-DPP:       ; %bb.0:
2031; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
2032; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
2033; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
2034; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
2035; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
2036; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2037; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2038; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
2039; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
2040; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
2041; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
2042; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
2043; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
2044; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
2045; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
2046; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2047; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2048; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
2049; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2050; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
2051; GFX1132-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
2052; GFX1132-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
2053; GFX1132-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
2054; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2055; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1
2056; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
2057; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2058; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
2059; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
2060; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2061; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2062; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
2063; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2064; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
2065; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2066; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2067; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
2068; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
2069; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2070; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2071; GFX1132-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
2072; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2073; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
2074; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
2075; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
2076; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2077; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v4, exec_lo, 0
2078; GFX1132-DPP-NEXT:    v_mov_b32_e32 v0, v1
2079; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
2080; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2081; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
2082; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB3_2
2083; GFX1132-DPP-NEXT:  ; %bb.1:
2084; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
2085; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, 0
2086; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2087; GFX1132-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
2088; GFX1132-DPP-NEXT:  .LBB3_2:
2089; GFX1132-DPP-NEXT:    s_endpgm
2090  %divValue = call float @div.float.value()
2091  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1
2092  ret void
2093}
2094
2095
2096define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
2097; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2098; GFX7LESS:       ; %bb.0:
2099; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
2100; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
2101; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2102; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2103; GFX7LESS-NEXT:    s_cbranch_execz .LBB4_3
2104; GFX7LESS-NEXT:  ; %bb.1:
2105; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2106; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
2107; GFX7LESS-NEXT:    s_load_dword s2, s[0:1], 0x0
2108; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
2109; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
2110; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
2111; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s2
2112; GFX7LESS-NEXT:    s_mov_b32 s2, -1
2113; GFX7LESS-NEXT:  .LBB4_2: ; %atomicrmw.start
2114; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
2115; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
2116; GFX7LESS-NEXT:    v_max_f32_e32 v0, 4.0, v0
2117; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
2118; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v1
2119; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v0
2120; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
2121; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
2122; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
2123; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2124; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v2
2125; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2126; GFX7LESS-NEXT:    s_cbranch_execnz .LBB4_2
2127; GFX7LESS-NEXT:  .LBB4_3:
2128; GFX7LESS-NEXT:    s_endpgm
2129;
2130; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2131; GFX9:       ; %bb.0:
2132; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2133; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2134; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2135; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2136; GFX9-NEXT:    s_cbranch_execz .LBB4_3
2137; GFX9-NEXT:  ; %bb.1:
2138; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2139; GFX9-NEXT:    s_mov_b64 s[2:3], 0
2140; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2141; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2142; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x0
2143; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2144; GFX9-NEXT:    v_mov_b32_e32 v1, s4
2145; GFX9-NEXT:  .LBB4_2: ; %atomicrmw.start
2146; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2147; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
2148; GFX9-NEXT:    v_max_f32_e32 v0, 4.0, v0
2149; GFX9-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
2150; GFX9-NEXT:    s_waitcnt vmcnt(0)
2151; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
2152; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
2153; GFX9-NEXT:    v_mov_b32_e32 v1, v0
2154; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
2155; GFX9-NEXT:    s_cbranch_execnz .LBB4_2
2156; GFX9-NEXT:  .LBB4_3:
2157; GFX9-NEXT:    s_endpgm
2158;
2159; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2160; GFX1064:       ; %bb.0:
2161; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2162; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2163; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2164; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2165; GFX1064-NEXT:    s_cbranch_execz .LBB4_2
2166; GFX1064-NEXT:  ; %bb.1:
2167; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2168; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
2169; GFX1064-NEXT:    v_mov_b32_e32 v1, 4.0
2170; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
2171; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2172; GFX1064-NEXT:  .LBB4_2:
2173; GFX1064-NEXT:    s_endpgm
2174;
2175; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2176; GFX1032:       ; %bb.0:
2177; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2178; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
2179; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
2180; GFX1032-NEXT:    s_cbranch_execz .LBB4_2
2181; GFX1032-NEXT:  ; %bb.1:
2182; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2183; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
2184; GFX1032-NEXT:    v_mov_b32_e32 v1, 4.0
2185; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
2186; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2187; GFX1032-NEXT:  .LBB4_2:
2188; GFX1032-NEXT:    s_endpgm
2189;
2190; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2191; GFX1164:       ; %bb.0:
2192; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2193; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
2194; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2195; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2196; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
2197; GFX1164-NEXT:    s_cbranch_execz .LBB4_2
2198; GFX1164-NEXT:  ; %bb.1:
2199; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2200; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
2201; GFX1164-NEXT:    v_mov_b32_e32 v1, 4.0
2202; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
2203; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2204; GFX1164-NEXT:  .LBB4_2:
2205; GFX1164-NEXT:    s_endpgm
2206;
2207; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2208; GFX1132:       ; %bb.0:
2209; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2210; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
2211; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2212; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
2213; GFX1132-NEXT:    s_cbranch_execz .LBB4_2
2214; GFX1132-NEXT:  ; %bb.1:
2215; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2216; GFX1132-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
2217; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
2218; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2219; GFX1132-NEXT:  .LBB4_2:
2220; GFX1132-NEXT:    s_endpgm
2221;
2222; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2223; GFX7LESS-DPP:       ; %bb.0:
2224; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
2225; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
2226; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2227; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2228; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB4_3
2229; GFX7LESS-DPP-NEXT:  ; %bb.1:
2230; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2231; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2232; GFX7LESS-DPP-NEXT:    s_load_dword s2, s[0:1], 0x0
2233; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
2234; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
2235; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2236; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s2
2237; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
2238; GFX7LESS-DPP-NEXT:  .LBB4_2: ; %atomicrmw.start
2239; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
2240; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
2241; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
2242; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
2243; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v1
2244; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v0
2245; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
2246; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
2247; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
2248; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2249; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v2
2250; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2251; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB4_2
2252; GFX7LESS-DPP-NEXT:  .LBB4_3:
2253; GFX7LESS-DPP-NEXT:    s_endpgm
2254;
2255; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2256; GFX9-DPP:       ; %bb.0:
2257; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2258; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2259; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2260; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2261; GFX9-DPP-NEXT:    s_cbranch_execz .LBB4_3
2262; GFX9-DPP-NEXT:  ; %bb.1:
2263; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2264; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
2265; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
2266; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2267; GFX9-DPP-NEXT:    s_load_dword s4, s[0:1], 0x0
2268; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2269; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s4
2270; GFX9-DPP-NEXT:  .LBB4_2: ; %atomicrmw.start
2271; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
2272; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
2273; GFX9-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
2274; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
2275; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
2276; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
2277; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
2278; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
2279; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
2280; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB4_2
2281; GFX9-DPP-NEXT:  .LBB4_3:
2282; GFX9-DPP-NEXT:    s_endpgm
2283;
2284; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2285; GFX1064-DPP:       ; %bb.0:
2286; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2287; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2288; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2289; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2290; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB4_2
2291; GFX1064-DPP-NEXT:  ; %bb.1:
2292; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2293; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
2294; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
2295; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2296; GFX1064-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2297; GFX1064-DPP-NEXT:  .LBB4_2:
2298; GFX1064-DPP-NEXT:    s_endpgm
2299;
2300; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2301; GFX1032-DPP:       ; %bb.0:
2302; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2303; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
2304; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
2305; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB4_2
2306; GFX1032-DPP-NEXT:  ; %bb.1:
2307; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2308; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
2309; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
2310; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2311; GFX1032-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2312; GFX1032-DPP-NEXT:  .LBB4_2:
2313; GFX1032-DPP-NEXT:    s_endpgm
2314;
2315; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2316; GFX1164-DPP:       ; %bb.0:
2317; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2318; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
2319; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2320; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2321; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
2322; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB4_2
2323; GFX1164-DPP-NEXT:  ; %bb.1:
2324; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2325; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
2326; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
2327; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2328; GFX1164-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2329; GFX1164-DPP-NEXT:  .LBB4_2:
2330; GFX1164-DPP-NEXT:    s_endpgm
2331;
2332; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe:
2333; GFX1132-DPP:       ; %bb.0:
2334; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2335; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
2336; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2337; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
2338; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB4_2
2339; GFX1132-DPP-NEXT:  ; %bb.1:
2340; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2341; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
2342; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2343; GFX1132-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2344; GFX1132-DPP-NEXT:  .LBB4_2:
2345; GFX1132-DPP-NEXT:    s_endpgm
2346  %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1
2347  ret void
2348}
2349
2350define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
2351; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2352; GFX7LESS:       ; %bb.0:
2353; GFX7LESS-NEXT:    s_mov_b32 s32, 0
2354; GFX7LESS-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2355; GFX7LESS-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2356; GFX7LESS-NEXT:    s_mov_b32 s38, -1
2357; GFX7LESS-NEXT:    s_mov_b32 s39, 0xe8f000
2358; GFX7LESS-NEXT:    s_add_u32 s36, s36, s11
2359; GFX7LESS-NEXT:    s_addc_u32 s37, s37, 0
2360; GFX7LESS-NEXT:    s_mov_b32 s14, s10
2361; GFX7LESS-NEXT:    s_mov_b32 s13, s9
2362; GFX7LESS-NEXT:    s_mov_b32 s12, s8
2363; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[6:7]
2364; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[4:5]
2365; GFX7LESS-NEXT:    s_add_u32 s8, s34, 44
2366; GFX7LESS-NEXT:    s_addc_u32 s9, s35, 0
2367; GFX7LESS-NEXT:    s_getpc_b64 s[4:5]
2368; GFX7LESS-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2369; GFX7LESS-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2370; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2371; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2372; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2373; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
2374; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
2375; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
2376; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
2377; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
2378; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[38:39]
2379; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
2380; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2381; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
2382; GFX7LESS-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
2383; GFX7LESS-NEXT:  .LBB5_1: ; %ComputeLoop
2384; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
2385; GFX7LESS-NEXT:    s_ff1_i32_b64 s2, s[0:1]
2386; GFX7LESS-NEXT:    v_mul_f32_e32 v1, 1.0, v2
2387; GFX7LESS-NEXT:    v_readlane_b32 s4, v0, s2
2388; GFX7LESS-NEXT:    s_lshl_b64 s[2:3], 1, s2
2389; GFX7LESS-NEXT:    v_mul_f32_e64 v2, 1.0, s4
2390; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
2391; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
2392; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
2393; GFX7LESS-NEXT:    v_max_f32_e32 v2, v1, v2
2394; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB5_1
2395; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
2396; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
2397; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
2398; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2399; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2400; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
2401; GFX7LESS-NEXT:    s_cbranch_execz .LBB5_5
2402; GFX7LESS-NEXT:  ; %bb.3:
2403; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x9
2404; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
2405; GFX7LESS-NEXT:    s_mov_b32 s2, -1
2406; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
2407; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[0:3], 0
2408; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
2409; GFX7LESS-NEXT:    v_mul_f32_e32 v2, 1.0, v2
2410; GFX7LESS-NEXT:  .LBB5_4: ; %atomicrmw.start
2411; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
2412; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
2413; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
2414; GFX7LESS-NEXT:    v_max_f32_e32 v0, v0, v2
2415; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
2416; GFX7LESS-NEXT:    v_mov_b32_e32 v4, v1
2417; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v0
2418; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc
2419; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
2420; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
2421; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2422; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v3
2423; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2424; GFX7LESS-NEXT:    s_cbranch_execnz .LBB5_4
2425; GFX7LESS-NEXT:  .LBB5_5:
2426; GFX7LESS-NEXT:    s_endpgm
2427;
2428; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2429; GFX9:       ; %bb.0:
2430; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2431; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2432; GFX9-NEXT:    s_mov_b32 s38, -1
2433; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2434; GFX9-NEXT:    s_add_u32 s36, s36, s11
2435; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2436; GFX9-NEXT:    s_mov_b64 s[34:35], s[4:5]
2437; GFX9-NEXT:    s_mov_b32 s12, s8
2438; GFX9-NEXT:    s_add_u32 s8, s34, 44
2439; GFX9-NEXT:    s_mov_b32 s13, s9
2440; GFX9-NEXT:    s_addc_u32 s9, s35, 0
2441; GFX9-NEXT:    s_getpc_b64 s[4:5]
2442; GFX9-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2443; GFX9-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2444; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2445; GFX9-NEXT:    s_mov_b32 s14, s10
2446; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
2447; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2448; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2449; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
2450; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
2451; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2452; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
2453; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2454; GFX9-NEXT:    s_mov_b32 s32, 0
2455; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2456; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2457; GFX9-NEXT:    s_mov_b64 s[0:1], exec
2458; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
2459; GFX9-NEXT:  .LBB5_1: ; %ComputeLoop
2460; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2461; GFX9-NEXT:    s_ff1_i32_b64 s2, s[0:1]
2462; GFX9-NEXT:    v_readlane_b32 s4, v0, s2
2463; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s2
2464; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
2465; GFX9-NEXT:    v_max_f32_e64 v2, s4, s4
2466; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
2467; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
2468; GFX9-NEXT:    v_max_f32_e32 v2, v1, v2
2469; GFX9-NEXT:    s_cbranch_scc1 .LBB5_1
2470; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
2471; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2472; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2473; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2474; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2475; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
2476; GFX9-NEXT:    s_cbranch_execz .LBB5_5
2477; GFX9-NEXT:  ; %bb.3:
2478; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2479; GFX9-NEXT:    v_mov_b32_e32 v3, 0
2480; GFX9-NEXT:    s_mov_b64 s[2:3], 0
2481; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
2482; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2483; GFX9-NEXT:    global_load_dword v1, v3, s[0:1]
2484; GFX9-NEXT:  .LBB5_4: ; %atomicrmw.start
2485; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2486; GFX9-NEXT:    s_waitcnt vmcnt(0)
2487; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
2488; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
2489; GFX9-NEXT:    global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc
2490; GFX9-NEXT:    s_waitcnt vmcnt(0)
2491; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
2492; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
2493; GFX9-NEXT:    v_mov_b32_e32 v1, v0
2494; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
2495; GFX9-NEXT:    s_cbranch_execnz .LBB5_4
2496; GFX9-NEXT:  .LBB5_5:
2497; GFX9-NEXT:    s_endpgm
2498;
2499; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2500; GFX1064:       ; %bb.0:
2501; GFX1064-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2502; GFX1064-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2503; GFX1064-NEXT:    s_mov_b32 s38, -1
2504; GFX1064-NEXT:    s_mov_b32 s39, 0x31e16000
2505; GFX1064-NEXT:    s_add_u32 s36, s36, s11
2506; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
2507; GFX1064-NEXT:    s_addc_u32 s37, s37, 0
2508; GFX1064-NEXT:    s_mov_b32 s12, s8
2509; GFX1064-NEXT:    s_add_u32 s8, s34, 44
2510; GFX1064-NEXT:    s_mov_b32 s13, s9
2511; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
2512; GFX1064-NEXT:    s_getpc_b64 s[4:5]
2513; GFX1064-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2514; GFX1064-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2515; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2516; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2517; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2518; GFX1064-NEXT:    s_mov_b32 s14, s10
2519; GFX1064-NEXT:    s_mov_b64 s[10:11], s[6:7]
2520; GFX1064-NEXT:    s_mov_b64 s[4:5], s[0:1]
2521; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
2522; GFX1064-NEXT:    v_or3_b32 v31, v0, v1, v2
2523; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
2524; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
2525; GFX1064-NEXT:    s_mov_b32 s32, 0
2526; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
2527; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2528; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
2529; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
2530; GFX1064-NEXT:  .LBB5_1: ; %ComputeLoop
2531; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
2532; GFX1064-NEXT:    s_ff1_i32_b64 s2, s[0:1]
2533; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v1
2534; GFX1064-NEXT:    v_readlane_b32 s3, v0, s2
2535; GFX1064-NEXT:    v_max_f32_e64 v2, s3, s3
2536; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s2
2537; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
2538; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
2539; GFX1064-NEXT:    v_max_f32_e32 v1, v1, v2
2540; GFX1064-NEXT:    s_cbranch_scc1 .LBB5_1
2541; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
2542; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2543; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2544; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2545; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2546; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
2547; GFX1064-NEXT:    s_cbranch_execz .LBB5_4
2548; GFX1064-NEXT:  ; %bb.3:
2549; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2550; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
2551; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
2552; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2553; GFX1064-NEXT:  .LBB5_4:
2554; GFX1064-NEXT:    s_endpgm
2555;
2556; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2557; GFX1032:       ; %bb.0:
2558; GFX1032-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2559; GFX1032-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2560; GFX1032-NEXT:    s_mov_b32 s38, -1
2561; GFX1032-NEXT:    s_mov_b32 s39, 0x31c16000
2562; GFX1032-NEXT:    s_add_u32 s36, s36, s11
2563; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
2564; GFX1032-NEXT:    s_addc_u32 s37, s37, 0
2565; GFX1032-NEXT:    s_mov_b32 s12, s8
2566; GFX1032-NEXT:    s_add_u32 s8, s34, 44
2567; GFX1032-NEXT:    s_mov_b32 s13, s9
2568; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
2569; GFX1032-NEXT:    s_getpc_b64 s[4:5]
2570; GFX1032-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2571; GFX1032-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2572; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2573; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2574; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2575; GFX1032-NEXT:    s_mov_b32 s14, s10
2576; GFX1032-NEXT:    s_mov_b64 s[10:11], s[6:7]
2577; GFX1032-NEXT:    s_mov_b64 s[4:5], s[0:1]
2578; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
2579; GFX1032-NEXT:    v_or3_b32 v31, v0, v1, v2
2580; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
2581; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
2582; GFX1032-NEXT:    s_mov_b32 s32, 0
2583; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
2584; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2585; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
2586; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
2587; GFX1032-NEXT:  .LBB5_1: ; %ComputeLoop
2588; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
2589; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
2590; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v1
2591; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
2592; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
2593; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
2594; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
2595; GFX1032-NEXT:    v_max_f32_e64 v2, s2, s2
2596; GFX1032-NEXT:    v_max_f32_e32 v1, v1, v2
2597; GFX1032-NEXT:    s_cbranch_scc1 .LBB5_1
2598; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
2599; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2600; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
2601; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
2602; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
2603; GFX1032-NEXT:    s_cbranch_execz .LBB5_4
2604; GFX1032-NEXT:  ; %bb.3:
2605; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2606; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
2607; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
2608; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
2609; GFX1032-NEXT:  .LBB5_4:
2610; GFX1032-NEXT:    s_endpgm
2611;
2612; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2613; GFX1164:       ; %bb.0:
2614; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
2615; GFX1164-NEXT:    s_mov_b32 s12, s8
2616; GFX1164-NEXT:    s_add_u32 s8, s34, 44
2617; GFX1164-NEXT:    s_mov_b32 s13, s9
2618; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
2619; GFX1164-NEXT:    s_getpc_b64 s[4:5]
2620; GFX1164-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2621; GFX1164-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2622; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
2623; GFX1164-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
2624; GFX1164-NEXT:    s_mov_b32 s14, s10
2625; GFX1164-NEXT:    s_mov_b64 s[10:11], s[6:7]
2626; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
2627; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
2628; GFX1164-NEXT:    s_mov_b32 s32, 0
2629; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
2630; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2631; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
2632; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
2633; GFX1164-NEXT:  .LBB5_1: ; %ComputeLoop
2634; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
2635; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2636; GFX1164-NEXT:    s_ctz_i32_b64 s2, s[0:1]
2637; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v1
2638; GFX1164-NEXT:    v_readlane_b32 s3, v0, s2
2639; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2640; GFX1164-NEXT:    v_max_f32_e64 v2, s3, s3
2641; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s2
2642; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
2643; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2644; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
2645; GFX1164-NEXT:    v_max_f32_e32 v1, v1, v2
2646; GFX1164-NEXT:    s_cbranch_scc1 .LBB5_1
2647; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
2648; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2649; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
2650; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2651; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2652; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
2653; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
2654; GFX1164-NEXT:    s_cbranch_execz .LBB5_4
2655; GFX1164-NEXT:  ; %bb.3:
2656; GFX1164-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
2657; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
2658; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
2659; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2660; GFX1164-NEXT:  .LBB5_4:
2661; GFX1164-NEXT:    s_endpgm
2662;
2663; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2664; GFX1132:       ; %bb.0:
2665; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
2666; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
2667; GFX1132-NEXT:    s_add_u32 s8, s34, 44
2668; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
2669; GFX1132-NEXT:    s_getpc_b64 s[4:5]
2670; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2671; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2672; GFX1132-NEXT:    s_mov_b32 s12, s13
2673; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
2674; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
2675; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
2676; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
2677; GFX1132-NEXT:    s_mov_b32 s13, s14
2678; GFX1132-NEXT:    s_mov_b32 s14, s15
2679; GFX1132-NEXT:    s_mov_b32 s32, 0
2680; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
2681; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2682; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
2683; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
2684; GFX1132-NEXT:  .LBB5_1: ; %ComputeLoop
2685; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
2686; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2687; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
2688; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v1
2689; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
2690; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
2691; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2692; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
2693; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
2694; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2695; GFX1132-NEXT:    v_max_f32_e64 v2, s2, s2
2696; GFX1132-NEXT:    v_max_f32_e32 v1, v1, v2
2697; GFX1132-NEXT:    s_cbranch_scc1 .LBB5_1
2698; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
2699; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2700; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
2701; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2702; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
2703; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
2704; GFX1132-NEXT:    s_cbranch_execz .LBB5_4
2705; GFX1132-NEXT:  ; %bb.3:
2706; GFX1132-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
2707; GFX1132-NEXT:    v_mov_b32_e32 v0, 0
2708; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
2709; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
2710; GFX1132-NEXT:  .LBB5_4:
2711; GFX1132-NEXT:    s_endpgm
2712;
2713; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2714; GFX7LESS-DPP:       ; %bb.0:
2715; GFX7LESS-DPP-NEXT:    s_mov_b32 s32, 0
2716; GFX7LESS-DPP-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
2717; GFX7LESS-DPP-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
2718; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, -1
2719; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, 0xe8f000
2720; GFX7LESS-DPP-NEXT:    s_add_u32 s40, s40, s11
2721; GFX7LESS-DPP-NEXT:    s_addc_u32 s41, s41, 0
2722; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s10
2723; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s9
2724; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s8
2725; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
2726; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x9
2727; GFX7LESS-DPP-NEXT:    s_mov_b32 s39, 0xf000
2728; GFX7LESS-DPP-NEXT:    s_mov_b32 s38, -1
2729; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s4, 44
2730; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s5, 0
2731; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[4:5]
2732; GFX7LESS-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2733; GFX7LESS-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2734; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2735; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2736; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2737; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
2738; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
2739; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
2740; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
2741; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[40:41]
2742; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[42:43]
2743; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2744; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2745; GFX7LESS-DPP-NEXT:    buffer_load_dword v1, off, s[36:39], 0
2746; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], 0
2747; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v2, 1.0, v0
2748; GFX7LESS-DPP-NEXT:  .LBB5_1: ; %atomicrmw.start
2749; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
2750; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
2751; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
2752; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, v0, v2
2753; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
2754; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, v1
2755; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v0
2756; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
2757; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
2758; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v1
2759; GFX7LESS-DPP-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
2760; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v3
2761; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[0:1]
2762; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB5_1
2763; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
2764; GFX7LESS-DPP-NEXT:    s_endpgm
2765;
2766; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2767; GFX9-DPP:       ; %bb.0:
2768; GFX9-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2769; GFX9-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2770; GFX9-DPP-NEXT:    s_mov_b32 s38, -1
2771; GFX9-DPP-NEXT:    s_mov_b32 s39, 0xe00000
2772; GFX9-DPP-NEXT:    s_add_u32 s36, s36, s11
2773; GFX9-DPP-NEXT:    s_addc_u32 s37, s37, 0
2774; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
2775; GFX9-DPP-NEXT:    s_mov_b32 s12, s8
2776; GFX9-DPP-NEXT:    s_add_u32 s8, s34, 44
2777; GFX9-DPP-NEXT:    s_mov_b32 s13, s9
2778; GFX9-DPP-NEXT:    s_addc_u32 s9, s35, 0
2779; GFX9-DPP-NEXT:    s_getpc_b64 s[4:5]
2780; GFX9-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2781; GFX9-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2782; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2783; GFX9-DPP-NEXT:    s_mov_b32 s14, s10
2784; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
2785; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2786; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2787; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
2788; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
2789; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
2790; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
2791; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
2792; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
2793; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2794; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2795; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2796; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
2797; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
2798; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
2799; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v4, v3, v0, s[0:1]
2800; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2801; GFX9-DPP-NEXT:    s_nop 1
2802; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf
2803; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
2804; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
2805; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
2806; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2807; GFX9-DPP-NEXT:    s_nop 1
2808; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf
2809; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
2810; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
2811; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2812; GFX9-DPP-NEXT:    s_nop 1
2813; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf
2814; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
2815; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
2816; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2817; GFX9-DPP-NEXT:    s_nop 1
2818; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf
2819; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
2820; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
2821; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2822; GFX9-DPP-NEXT:    s_nop 1
2823; GFX9-DPP-NEXT:    v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf
2824; GFX9-DPP-NEXT:    v_max_f32_e32 v5, v5, v5
2825; GFX9-DPP-NEXT:    v_max_f32_e32 v4, v4, v5
2826; GFX9-DPP-NEXT:    s_nop 1
2827; GFX9-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf
2828; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
2829; GFX9-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
2830; GFX9-DPP-NEXT:    v_readlane_b32 s4, v3, 63
2831; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
2832; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2833; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2834; GFX9-DPP-NEXT:    s_cbranch_execz .LBB5_3
2835; GFX9-DPP-NEXT:  ; %bb.1:
2836; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2837; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
2838; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
2839; GFX9-DPP-NEXT:    v_max_f32_e64 v6, s4, s4
2840; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2841; GFX9-DPP-NEXT:    global_load_dword v1, v2, s[0:1]
2842; GFX9-DPP-NEXT:  .LBB5_2: ; %atomicrmw.start
2843; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
2844; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
2845; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
2846; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v0, v6
2847; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
2848; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
2849; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
2850; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
2851; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
2852; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
2853; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB5_2
2854; GFX9-DPP-NEXT:  .LBB5_3:
2855; GFX9-DPP-NEXT:    s_endpgm
2856;
2857; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2858; GFX1064-DPP:       ; %bb.0:
2859; GFX1064-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2860; GFX1064-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2861; GFX1064-DPP-NEXT:    s_mov_b32 s38, -1
2862; GFX1064-DPP-NEXT:    s_mov_b32 s39, 0x31e16000
2863; GFX1064-DPP-NEXT:    s_add_u32 s36, s36, s11
2864; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
2865; GFX1064-DPP-NEXT:    s_addc_u32 s37, s37, 0
2866; GFX1064-DPP-NEXT:    s_mov_b32 s12, s8
2867; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
2868; GFX1064-DPP-NEXT:    s_mov_b32 s13, s9
2869; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
2870; GFX1064-DPP-NEXT:    s_getpc_b64 s[4:5]
2871; GFX1064-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2872; GFX1064-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2873; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2874; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2875; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2876; GFX1064-DPP-NEXT:    s_mov_b32 s14, s10
2877; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
2878; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
2879; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
2880; GFX1064-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
2881; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
2882; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
2883; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
2884; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2885; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2886; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
2887; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
2888; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1]
2889; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2890; GFX1064-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
2891; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
2892; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
2893; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
2894; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
2895; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2896; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2897; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2898; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
2899; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2900; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2901; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2902; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
2903; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2904; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2905; GFX1064-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
2906; GFX1064-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
2907; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2908; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v3, 32
2909; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v3, 0
2910; GFX1064-DPP-NEXT:    v_max_f32_e64 v3, s2, s2
2911; GFX1064-DPP-NEXT:    v_max_f32_e64 v4, s3, s3
2912; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
2913; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2914; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
2915; GFX1064-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
2916; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
2917; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
2918; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, v3
2919; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2920; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
2921; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB5_2
2922; GFX1064-DPP-NEXT:  ; %bb.1:
2923; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2924; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
2925; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2926; GFX1064-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
2927; GFX1064-DPP-NEXT:  .LBB5_2:
2928; GFX1064-DPP-NEXT:    s_endpgm
2929;
2930; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2931; GFX1032-DPP:       ; %bb.0:
2932; GFX1032-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2933; GFX1032-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2934; GFX1032-DPP-NEXT:    s_mov_b32 s38, -1
2935; GFX1032-DPP-NEXT:    s_mov_b32 s39, 0x31c16000
2936; GFX1032-DPP-NEXT:    s_add_u32 s36, s36, s11
2937; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
2938; GFX1032-DPP-NEXT:    s_addc_u32 s37, s37, 0
2939; GFX1032-DPP-NEXT:    s_mov_b32 s12, s8
2940; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
2941; GFX1032-DPP-NEXT:    s_mov_b32 s13, s9
2942; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
2943; GFX1032-DPP-NEXT:    s_getpc_b64 s[4:5]
2944; GFX1032-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
2945; GFX1032-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
2946; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
2947; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
2948; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
2949; GFX1032-DPP-NEXT:    s_mov_b32 s14, s10
2950; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
2951; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
2952; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
2953; GFX1032-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
2954; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
2955; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
2956; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
2957; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2958; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2959; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
2960; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
2961; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0
2962; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2963; GFX1032-DPP-NEXT:    v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
2964; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
2965; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v3
2966; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v4, v3
2967; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
2968; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2969; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2970; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2971; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
2972; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2973; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
2974; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2975; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
2976; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v5, v5
2977; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2978; GFX1032-DPP-NEXT:    v_permlanex16_b32 v4, v3, 0, 0
2979; GFX1032-DPP-NEXT:    v_max_f32_e32 v4, v4, v4
2980; GFX1032-DPP-NEXT:    v_max_f32_e32 v3, v3, v4
2981; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
2982; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2983; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, v3
2984; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
2985; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
2986; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB5_2
2987; GFX1032-DPP-NEXT:  ; %bb.1:
2988; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
2989; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
2990; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
2991; GFX1032-DPP-NEXT:    global_atomic_fmax v1, v0, s[0:1]
2992; GFX1032-DPP-NEXT:  .LBB5_2:
2993; GFX1032-DPP-NEXT:    s_endpgm
2994;
2995; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
2996; GFX1164-DPP:       ; %bb.0:
2997; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
2998; GFX1164-DPP-NEXT:    s_mov_b32 s12, s8
2999; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
3000; GFX1164-DPP-NEXT:    s_mov_b32 s13, s9
3001; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
3002; GFX1164-DPP-NEXT:    s_getpc_b64 s[4:5]
3003; GFX1164-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
3004; GFX1164-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
3005; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
3006; GFX1164-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
3007; GFX1164-DPP-NEXT:    s_mov_b32 s14, s10
3008; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
3009; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
3010; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
3011; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
3012; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3013; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3014; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
3015; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
3016; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1]
3017; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3018; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3019; GFX1164-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
3020; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
3021; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v1
3022; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3023; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
3024; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
3025; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3026; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
3027; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3028; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3029; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3030; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
3031; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
3032; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3033; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3034; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3035; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
3036; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3037; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
3038; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3039; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3040; GFX1164-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
3041; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
3042; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3043; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3044; GFX1164-DPP-NEXT:    v_permlane64_b32 v2, v1
3045; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3046; GFX1164-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
3047; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
3048; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
3049; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
3050; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3051; GFX1164-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3052; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
3053; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
3054; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v4, exec_hi, v0
3055; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3056; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, v1
3057; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
3058; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
3059; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB5_2
3060; GFX1164-DPP-NEXT:  ; %bb.1:
3061; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
3062; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
3063; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3064; GFX1164-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
3065; GFX1164-DPP-NEXT:  .LBB5_2:
3066; GFX1164-DPP-NEXT:    s_endpgm
3067;
3068; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
3069; GFX1132-DPP:       ; %bb.0:
3070; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
3071; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
3072; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
3073; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
3074; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
3075; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
3076; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
3077; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
3078; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
3079; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
3080; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
3081; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
3082; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
3083; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
3084; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
3085; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3086; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3087; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
3088; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3089; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
3090; GFX1132-DPP-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
3091; GFX1132-DPP-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
3092; GFX1132-DPP-NEXT:    v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf
3093; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3094; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1
3095; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v2, v1
3096; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3097; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
3098; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
3099; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3100; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3101; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
3102; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3103; GFX1132-DPP-NEXT:    v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000
3104; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3105; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3106; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
3107; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v3, v3
3108; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3109; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3110; GFX1132-DPP-NEXT:    v_permlanex16_b32 v2, v1, 0, 0
3111; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3112; GFX1132-DPP-NEXT:    v_max_f32_e32 v2, v2, v2
3113; GFX1132-DPP-NEXT:    v_max_f32_e32 v1, v1, v2
3114; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
3115; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
3116; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v4, exec_lo, 0
3117; GFX1132-DPP-NEXT:    v_mov_b32_e32 v0, v1
3118; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
3119; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3120; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v4
3121; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB5_2
3122; GFX1132-DPP-NEXT:  ; %bb.1:
3123; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
3124; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, 0
3125; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3126; GFX1132-DPP-NEXT:    global_atomic_max_f32 v4, v0, s[0:1]
3127; GFX1132-DPP-NEXT:  .LBB5_2:
3128; GFX1132-DPP-NEXT:    s_endpgm
3129  %divValue = call float @div.float.value()
3130  %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue monotonic, align 4, !amdgpu.no.fine.grained.memory !1
3131  ret void
3132}
3133
3134define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
3135; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3136; GFX7LESS:       ; %bb.0:
3137; GFX7LESS-NEXT:    s_movk_i32 s32, 0x800
3138; GFX7LESS-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3139; GFX7LESS-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3140; GFX7LESS-NEXT:    s_mov_b32 s50, -1
3141; GFX7LESS-NEXT:    s_mov_b32 s51, 0xe8f000
3142; GFX7LESS-NEXT:    s_add_u32 s48, s48, s11
3143; GFX7LESS-NEXT:    s_addc_u32 s49, s49, 0
3144; GFX7LESS-NEXT:    s_mov_b64 s[40:41], s[0:1]
3145; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
3146; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
3147; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3148; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3149; GFX7LESS-NEXT:    s_cbranch_execz .LBB6_3
3150; GFX7LESS-NEXT:  ; %bb.1:
3151; GFX7LESS-NEXT:    s_mov_b32 s33, s10
3152; GFX7LESS-NEXT:    s_mov_b32 s42, s9
3153; GFX7LESS-NEXT:    s_mov_b32 s43, s8
3154; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[6:7]
3155; GFX7LESS-NEXT:    s_mov_b64 s[36:37], s[4:5]
3156; GFX7LESS-NEXT:    s_mov_b64 s[38:39], s[2:3]
3157; GFX7LESS-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
3158; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
3159; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3160; GFX7LESS-NEXT:    s_mov_b64 s[46:47], 0
3161; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
3162; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
3163; GFX7LESS-NEXT:    v_or_b32_e32 v3, v0, v1
3164; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
3165; GFX7LESS-NEXT:    v_mov_b32_e32 v0, s0
3166; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s1
3167; GFX7LESS-NEXT:    v_or_b32_e32 v40, v3, v2
3168; GFX7LESS-NEXT:  .LBB6_2: ; %atomicrmw.start
3169; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
3170; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
3171; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
3172; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
3173; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0
3174; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
3175; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
3176; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], 4.0
3177; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
3178; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
3179; GFX7LESS-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3180; GFX7LESS-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3181; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
3182; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
3183; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3184; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
3185; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 8
3186; GFX7LESS-NEXT:    v_mov_b32_e32 v1, 0
3187; GFX7LESS-NEXT:    v_mov_b32_e32 v4, 0
3188; GFX7LESS-NEXT:    v_mov_b32_e32 v5, 8
3189; GFX7LESS-NEXT:    v_mov_b32_e32 v6, 0
3190; GFX7LESS-NEXT:    v_mov_b32_e32 v7, 0
3191; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
3192; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[38:39]
3193; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
3194; GFX7LESS-NEXT:    s_mov_b32 s12, s43
3195; GFX7LESS-NEXT:    s_mov_b32 s13, s42
3196; GFX7LESS-NEXT:    s_mov_b32 s14, s33
3197; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
3198; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
3199; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
3200; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s44
3201; GFX7LESS-NEXT:    v_mov_b32_e32 v3, s45
3202; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
3203; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3204; GFX7LESS-NEXT:    v_and_b32_e32 v2, 1, v0
3205; GFX7LESS-NEXT:    buffer_load_dword v0, off, s[48:51], 0
3206; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
3207; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3208; GFX7LESS-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3209; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3210; GFX7LESS-NEXT:    s_cbranch_execnz .LBB6_2
3211; GFX7LESS-NEXT:  .LBB6_3:
3212; GFX7LESS-NEXT:    s_endpgm
3213;
3214; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3215; GFX9:       ; %bb.0:
3216; GFX9-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3217; GFX9-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3218; GFX9-NEXT:    s_mov_b32 s50, -1
3219; GFX9-NEXT:    s_mov_b32 s51, 0xe00000
3220; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3221; GFX9-NEXT:    s_add_u32 s48, s48, s11
3222; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
3223; GFX9-NEXT:    s_addc_u32 s49, s49, 0
3224; GFX9-NEXT:    s_mov_b64 s[40:41], s[0:1]
3225; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3226; GFX9-NEXT:    s_movk_i32 s32, 0x800
3227; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3228; GFX9-NEXT:    s_cbranch_execz .LBB6_3
3229; GFX9-NEXT:  ; %bb.1:
3230; GFX9-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3231; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3232; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3233; GFX9-NEXT:    s_mov_b32 s33, s10
3234; GFX9-NEXT:    s_mov_b32 s42, s9
3235; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3236; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3237; GFX9-NEXT:    s_mov_b32 s43, s8
3238; GFX9-NEXT:    s_mov_b64 s[34:35], s[6:7]
3239; GFX9-NEXT:    s_mov_b64 s[36:37], s[4:5]
3240; GFX9-NEXT:    s_mov_b64 s[38:39], s[2:3]
3241; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3242; GFX9-NEXT:    v_mov_b32_e32 v2, s1
3243; GFX9-NEXT:    s_mov_b64 s[46:47], 0
3244; GFX9-NEXT:    v_mov_b32_e32 v1, s0
3245; GFX9-NEXT:    v_or3_b32 v40, v0, v4, v3
3246; GFX9-NEXT:  .LBB6_2: ; %atomicrmw.start
3247; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3248; GFX9-NEXT:    s_waitcnt vmcnt(0)
3249; GFX9-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3250; GFX9-NEXT:    s_add_u32 s8, s36, 44
3251; GFX9-NEXT:    s_addc_u32 s9, s37, 0
3252; GFX9-NEXT:    s_getpc_b64 s[0:1]
3253; GFX9-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3254; GFX9-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3255; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3256; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
3257; GFX9-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3258; GFX9-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3259; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
3260; GFX9-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3261; GFX9-NEXT:    s_mov_b64 s[6:7], s[38:39]
3262; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
3263; GFX9-NEXT:    s_mov_b32 s12, s43
3264; GFX9-NEXT:    s_mov_b32 s13, s42
3265; GFX9-NEXT:    s_mov_b32 s14, s33
3266; GFX9-NEXT:    v_mov_b32_e32 v31, v40
3267; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
3268; GFX9-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3269; GFX9-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3270; GFX9-NEXT:    v_mov_b32_e32 v0, 8
3271; GFX9-NEXT:    v_mov_b32_e32 v1, 0
3272; GFX9-NEXT:    v_mov_b32_e32 v2, s44
3273; GFX9-NEXT:    v_mov_b32_e32 v3, s45
3274; GFX9-NEXT:    v_mov_b32_e32 v4, 0
3275; GFX9-NEXT:    v_mov_b32_e32 v5, 8
3276; GFX9-NEXT:    v_mov_b32_e32 v6, 0
3277; GFX9-NEXT:    v_mov_b32_e32 v7, 0
3278; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3279; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3280; GFX9-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3281; GFX9-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3282; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
3283; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3284; GFX9-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3285; GFX9-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3286; GFX9-NEXT:    s_cbranch_execnz .LBB6_2
3287; GFX9-NEXT:  .LBB6_3:
3288; GFX9-NEXT:    s_endpgm
3289;
3290; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3291; GFX1064:       ; %bb.0:
3292; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3293; GFX1064-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3294; GFX1064-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3295; GFX1064-NEXT:    s_mov_b32 s50, -1
3296; GFX1064-NEXT:    s_mov_b32 s51, 0x31e16000
3297; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
3298; GFX1064-NEXT:    s_add_u32 s48, s48, s11
3299; GFX1064-NEXT:    s_addc_u32 s49, s49, 0
3300; GFX1064-NEXT:    s_mov_b64 s[40:41], s[0:1]
3301; GFX1064-NEXT:    s_movk_i32 s32, 0x800
3302; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3303; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3304; GFX1064-NEXT:    s_cbranch_execz .LBB6_3
3305; GFX1064-NEXT:  ; %bb.1:
3306; GFX1064-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3307; GFX1064-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3308; GFX1064-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3309; GFX1064-NEXT:    s_mov_b32 s33, s10
3310; GFX1064-NEXT:    s_mov_b32 s42, s9
3311; GFX1064-NEXT:    s_mov_b32 s43, s8
3312; GFX1064-NEXT:    s_mov_b64 s[34:35], s[6:7]
3313; GFX1064-NEXT:    v_or3_b32 v40, v0, v4, v3
3314; GFX1064-NEXT:    s_mov_b64 s[36:37], s[4:5]
3315; GFX1064-NEXT:    s_mov_b64 s[38:39], s[2:3]
3316; GFX1064-NEXT:    s_mov_b64 s[46:47], 0
3317; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
3318; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3319; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
3320; GFX1064-NEXT:    v_mov_b32_e32 v2, s1
3321; GFX1064-NEXT:    v_mov_b32_e32 v1, s0
3322; GFX1064-NEXT:  .LBB6_2: ; %atomicrmw.start
3323; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
3324; GFX1064-NEXT:    s_waitcnt vmcnt(0)
3325; GFX1064-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3326; GFX1064-NEXT:    s_add_u32 s8, s36, 44
3327; GFX1064-NEXT:    s_addc_u32 s9, s37, 0
3328; GFX1064-NEXT:    s_getpc_b64 s[0:1]
3329; GFX1064-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3330; GFX1064-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3331; GFX1064-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3332; GFX1064-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3333; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3334; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
3335; GFX1064-NEXT:    v_mov_b32_e32 v0, 8
3336; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
3337; GFX1064-NEXT:    v_mov_b32_e32 v2, s44
3338; GFX1064-NEXT:    v_mov_b32_e32 v5, 8
3339; GFX1064-NEXT:    v_mov_b32_e32 v6, 0
3340; GFX1064-NEXT:    v_mov_b32_e32 v7, 0
3341; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
3342; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
3343; GFX1064-NEXT:    s_mov_b64 s[6:7], s[38:39]
3344; GFX1064-NEXT:    s_mov_b64 s[10:11], s[34:35]
3345; GFX1064-NEXT:    s_mov_b32 s12, s43
3346; GFX1064-NEXT:    s_mov_b32 s13, s42
3347; GFX1064-NEXT:    s_mov_b32 s14, s33
3348; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
3349; GFX1064-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3350; GFX1064-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3351; GFX1064-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3352; GFX1064-NEXT:    v_mov_b32_e32 v3, s45
3353; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
3354; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
3355; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3356; GFX1064-NEXT:    s_clause 0x1
3357; GFX1064-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3358; GFX1064-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3359; GFX1064-NEXT:    v_and_b32_e32 v0, 1, v0
3360; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3361; GFX1064-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3362; GFX1064-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3363; GFX1064-NEXT:    s_cbranch_execnz .LBB6_2
3364; GFX1064-NEXT:  .LBB6_3:
3365; GFX1064-NEXT:    s_endpgm
3366;
3367; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3368; GFX1032:       ; %bb.0:
3369; GFX1032-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3370; GFX1032-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3371; GFX1032-NEXT:    s_mov_b32 s50, -1
3372; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3373; GFX1032-NEXT:    s_mov_b32 s51, 0x31c16000
3374; GFX1032-NEXT:    s_add_u32 s48, s48, s11
3375; GFX1032-NEXT:    s_addc_u32 s49, s49, 0
3376; GFX1032-NEXT:    s_mov_b64 s[40:41], s[0:1]
3377; GFX1032-NEXT:    s_mov_b32 s46, 0
3378; GFX1032-NEXT:    s_movk_i32 s32, 0x400
3379; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
3380; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
3381; GFX1032-NEXT:    s_cbranch_execz .LBB6_3
3382; GFX1032-NEXT:  ; %bb.1:
3383; GFX1032-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3384; GFX1032-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3385; GFX1032-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3386; GFX1032-NEXT:    s_mov_b32 s33, s10
3387; GFX1032-NEXT:    s_mov_b32 s42, s9
3388; GFX1032-NEXT:    s_mov_b32 s43, s8
3389; GFX1032-NEXT:    s_mov_b64 s[34:35], s[6:7]
3390; GFX1032-NEXT:    v_or3_b32 v40, v0, v4, v3
3391; GFX1032-NEXT:    s_mov_b64 s[36:37], s[4:5]
3392; GFX1032-NEXT:    s_mov_b64 s[38:39], s[2:3]
3393; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
3394; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3395; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
3396; GFX1032-NEXT:    v_mov_b32_e32 v2, s1
3397; GFX1032-NEXT:    v_mov_b32_e32 v1, s0
3398; GFX1032-NEXT:  .LBB6_2: ; %atomicrmw.start
3399; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
3400; GFX1032-NEXT:    s_waitcnt vmcnt(0)
3401; GFX1032-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3402; GFX1032-NEXT:    s_add_u32 s8, s36, 44
3403; GFX1032-NEXT:    s_addc_u32 s9, s37, 0
3404; GFX1032-NEXT:    s_getpc_b64 s[0:1]
3405; GFX1032-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3406; GFX1032-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3407; GFX1032-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3408; GFX1032-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3409; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3410; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
3411; GFX1032-NEXT:    v_mov_b32_e32 v0, 8
3412; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
3413; GFX1032-NEXT:    v_mov_b32_e32 v2, s44
3414; GFX1032-NEXT:    v_mov_b32_e32 v5, 8
3415; GFX1032-NEXT:    v_mov_b32_e32 v6, 0
3416; GFX1032-NEXT:    v_mov_b32_e32 v7, 0
3417; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
3418; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
3419; GFX1032-NEXT:    s_mov_b64 s[6:7], s[38:39]
3420; GFX1032-NEXT:    s_mov_b64 s[10:11], s[34:35]
3421; GFX1032-NEXT:    s_mov_b32 s12, s43
3422; GFX1032-NEXT:    s_mov_b32 s13, s42
3423; GFX1032-NEXT:    s_mov_b32 s14, s33
3424; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
3425; GFX1032-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3426; GFX1032-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3427; GFX1032-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3428; GFX1032-NEXT:    v_mov_b32_e32 v3, s45
3429; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
3430; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
3431; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3432; GFX1032-NEXT:    s_clause 0x1
3433; GFX1032-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3434; GFX1032-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3435; GFX1032-NEXT:    v_and_b32_e32 v0, 1, v0
3436; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3437; GFX1032-NEXT:    s_or_b32 s46, vcc_lo, s46
3438; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
3439; GFX1032-NEXT:    s_cbranch_execnz .LBB6_2
3440; GFX1032-NEXT:  .LBB6_3:
3441; GFX1032-NEXT:    s_endpgm
3442;
3443; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3444; GFX1164:       ; %bb.0:
3445; GFX1164-NEXT:    v_mov_b32_e32 v40, v0
3446; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
3447; GFX1164-NEXT:    s_mov_b64 s[40:41], s[0:1]
3448; GFX1164-NEXT:    s_mov_b32 s32, 32
3449; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
3450; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3451; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
3452; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
3453; GFX1164-NEXT:    s_cbranch_execz .LBB6_3
3454; GFX1164-NEXT:  ; %bb.1:
3455; GFX1164-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
3456; GFX1164-NEXT:    s_mov_b32 s33, s10
3457; GFX1164-NEXT:    s_mov_b32 s42, s9
3458; GFX1164-NEXT:    s_mov_b32 s43, s8
3459; GFX1164-NEXT:    s_mov_b64 s[34:35], s[6:7]
3460; GFX1164-NEXT:    s_mov_b64 s[36:37], s[4:5]
3461; GFX1164-NEXT:    s_mov_b64 s[38:39], s[2:3]
3462; GFX1164-NEXT:    s_mov_b64 s[46:47], 0
3463; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
3464; GFX1164-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
3465; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
3466; GFX1164-NEXT:    v_mov_b32_e32 v2, s1
3467; GFX1164-NEXT:    v_mov_b32_e32 v1, s0
3468; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x1
3469; GFX1164-NEXT:    .p2align 6
3470; GFX1164-NEXT:  .LBB6_2: ; %atomicrmw.start
3471; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
3472; GFX1164-NEXT:    s_waitcnt vmcnt(0)
3473; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3474; GFX1164-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3475; GFX1164-NEXT:    s_add_u32 s8, s36, 44
3476; GFX1164-NEXT:    s_addc_u32 s9, s37, 0
3477; GFX1164-NEXT:    s_getpc_b64 s[0:1]
3478; GFX1164-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3479; GFX1164-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3480; GFX1164-NEXT:    v_mov_b32_e32 v31, v40
3481; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
3482; GFX1164-NEXT:    v_mov_b32_e32 v0, 8
3483; GFX1164-NEXT:    v_mov_b32_e32 v5, 8
3484; GFX1164-NEXT:    v_mov_b32_e32 v6, 0
3485; GFX1164-NEXT:    v_mov_b32_e32 v7, 0
3486; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
3487; GFX1164-NEXT:    s_mov_b64 s[6:7], s[38:39]
3488; GFX1164-NEXT:    s_mov_b64 s[10:11], s[34:35]
3489; GFX1164-NEXT:    s_mov_b32 s12, s43
3490; GFX1164-NEXT:    s_mov_b32 s13, s42
3491; GFX1164-NEXT:    s_mov_b32 s14, s33
3492; GFX1164-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3493; GFX1164-NEXT:    scratch_store_b64 off, v[1:2], off
3494; GFX1164-NEXT:    v_mov_b32_e32 v1, 0
3495; GFX1164-NEXT:    v_mov_b32_e32 v2, s44
3496; GFX1164-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
3497; GFX1164-NEXT:    v_mov_b32_e32 v3, s45
3498; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
3499; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
3500; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3501; GFX1164-NEXT:    scratch_load_b64 v[1:2], off, off
3502; GFX1164-NEXT:    v_and_b32_e32 v0, 1, v0
3503; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3504; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3505; GFX1164-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3506; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
3507; GFX1164-NEXT:    s_cbranch_execnz .LBB6_2
3508; GFX1164-NEXT:  .LBB6_3:
3509; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x2
3510; GFX1164-NEXT:    s_endpgm
3511;
3512; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3513; GFX1132:       ; %bb.0:
3514; GFX1132-NEXT:    v_mov_b32_e32 v40, v0
3515; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
3516; GFX1132-NEXT:    s_mov_b64 s[40:41], s[0:1]
3517; GFX1132-NEXT:    s_mov_b32 s46, 0
3518; GFX1132-NEXT:    s_mov_b32 s32, 32
3519; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
3520; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
3521; GFX1132-NEXT:    s_cbranch_execz .LBB6_3
3522; GFX1132-NEXT:  ; %bb.1:
3523; GFX1132-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
3524; GFX1132-NEXT:    s_mov_b32 s33, s15
3525; GFX1132-NEXT:    s_mov_b32 s42, s14
3526; GFX1132-NEXT:    s_mov_b32 s43, s13
3527; GFX1132-NEXT:    s_mov_b64 s[34:35], s[6:7]
3528; GFX1132-NEXT:    s_mov_b64 s[36:37], s[4:5]
3529; GFX1132-NEXT:    s_mov_b64 s[38:39], s[2:3]
3530; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
3531; GFX1132-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
3532; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
3533; GFX1132-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
3534; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x1
3535; GFX1132-NEXT:    .p2align 6
3536; GFX1132-NEXT:  .LBB6_2: ; %atomicrmw.start
3537; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
3538; GFX1132-NEXT:    s_waitcnt vmcnt(0)
3539; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3540; GFX1132-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3541; GFX1132-NEXT:    s_add_u32 s8, s36, 44
3542; GFX1132-NEXT:    s_addc_u32 s9, s37, 0
3543; GFX1132-NEXT:    s_getpc_b64 s[0:1]
3544; GFX1132-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3545; GFX1132-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3546; GFX1132-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
3547; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
3548; GFX1132-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
3549; GFX1132-NEXT:    v_mov_b32_e32 v7, 0
3550; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
3551; GFX1132-NEXT:    s_mov_b64 s[6:7], s[38:39]
3552; GFX1132-NEXT:    s_mov_b64 s[10:11], s[34:35]
3553; GFX1132-NEXT:    s_mov_b32 s12, s43
3554; GFX1132-NEXT:    s_mov_b32 s13, s42
3555; GFX1132-NEXT:    s_mov_b32 s14, s33
3556; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3557; GFX1132-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3558; GFX1132-NEXT:    scratch_store_b64 off, v[1:2], off
3559; GFX1132-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
3560; GFX1132-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
3561; GFX1132-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
3562; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
3563; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3564; GFX1132-NEXT:    scratch_load_b64 v[1:2], off, off
3565; GFX1132-NEXT:    v_and_b32_e32 v0, 1, v0
3566; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3567; GFX1132-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3568; GFX1132-NEXT:    s_or_b32 s46, vcc_lo, s46
3569; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
3570; GFX1132-NEXT:    s_cbranch_execnz .LBB6_2
3571; GFX1132-NEXT:  .LBB6_3:
3572; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x2
3573; GFX1132-NEXT:    s_endpgm
3574;
3575; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3576; GFX7LESS-DPP:       ; %bb.0:
3577; GFX7LESS-DPP-NEXT:    s_movk_i32 s32, 0x800
3578; GFX7LESS-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3579; GFX7LESS-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3580; GFX7LESS-DPP-NEXT:    s_mov_b32 s50, -1
3581; GFX7LESS-DPP-NEXT:    s_mov_b32 s51, 0xe8f000
3582; GFX7LESS-DPP-NEXT:    s_add_u32 s48, s48, s11
3583; GFX7LESS-DPP-NEXT:    s_addc_u32 s49, s49, 0
3584; GFX7LESS-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3585; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
3586; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
3587; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3588; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3589; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB6_3
3590; GFX7LESS-DPP-NEXT:  ; %bb.1:
3591; GFX7LESS-DPP-NEXT:    s_mov_b32 s33, s10
3592; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, s9
3593; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, s8
3594; GFX7LESS-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3595; GFX7LESS-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3596; GFX7LESS-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3597; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
3598; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3599; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3600; GFX7LESS-DPP-NEXT:    s_mov_b64 s[46:47], 0
3601; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
3602; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
3603; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v3, v0, v1
3604; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3605; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, s0
3606; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s1
3607; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v40, v3, v2
3608; GFX7LESS-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3609; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3610; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
3611; GFX7LESS-DPP-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
3612; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
3613; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0
3614; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
3615; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
3616; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], 4.0
3617; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
3618; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
3619; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3620; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3621; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
3622; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
3623; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3624; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
3625; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, 8
3626; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, 0
3627; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, 0
3628; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v5, 8
3629; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, 0
3630; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, 0
3631; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3632; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3633; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3634; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
3635; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
3636; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
3637; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
3638; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
3639; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
3640; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, s44
3641; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, s45
3642; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3643; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3644; GFX7LESS-DPP-NEXT:    v_and_b32_e32 v2, 1, v0
3645; GFX7LESS-DPP-NEXT:    buffer_load_dword v0, off, s[48:51], 0
3646; GFX7LESS-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
3647; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3648; GFX7LESS-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3649; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3650; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB6_2
3651; GFX7LESS-DPP-NEXT:  .LBB6_3:
3652; GFX7LESS-DPP-NEXT:    s_endpgm
3653;
3654; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3655; GFX9-DPP:       ; %bb.0:
3656; GFX9-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3657; GFX9-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3658; GFX9-DPP-NEXT:    s_mov_b32 s50, -1
3659; GFX9-DPP-NEXT:    s_mov_b32 s51, 0xe00000
3660; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3661; GFX9-DPP-NEXT:    s_add_u32 s48, s48, s11
3662; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
3663; GFX9-DPP-NEXT:    s_addc_u32 s49, s49, 0
3664; GFX9-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3665; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3666; GFX9-DPP-NEXT:    s_movk_i32 s32, 0x800
3667; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3668; GFX9-DPP-NEXT:    s_cbranch_execz .LBB6_3
3669; GFX9-DPP-NEXT:  ; %bb.1:
3670; GFX9-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3671; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3672; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3673; GFX9-DPP-NEXT:    s_mov_b32 s33, s10
3674; GFX9-DPP-NEXT:    s_mov_b32 s42, s9
3675; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3676; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3677; GFX9-DPP-NEXT:    s_mov_b32 s43, s8
3678; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3679; GFX9-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3680; GFX9-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3681; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3682; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s1
3683; GFX9-DPP-NEXT:    s_mov_b64 s[46:47], 0
3684; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s0
3685; GFX9-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
3686; GFX9-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3687; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3688; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
3689; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3690; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
3691; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
3692; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
3693; GFX9-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3694; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3695; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3696; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
3697; GFX9-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3698; GFX9-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3699; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3700; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3701; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3702; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3703; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
3704; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
3705; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
3706; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
3707; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
3708; GFX9-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3709; GFX9-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3710; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 8
3711; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, 0
3712; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s44
3713; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, s45
3714; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0
3715; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 8
3716; GFX9-DPP-NEXT:    v_mov_b32_e32 v6, 0
3717; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
3718; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3719; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3720; GFX9-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3721; GFX9-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3722; GFX9-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
3723; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3724; GFX9-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3725; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3726; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB6_2
3727; GFX9-DPP-NEXT:  .LBB6_3:
3728; GFX9-DPP-NEXT:    s_endpgm
3729;
3730; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3731; GFX1064-DPP:       ; %bb.0:
3732; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3733; GFX1064-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3734; GFX1064-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3735; GFX1064-DPP-NEXT:    s_mov_b32 s50, -1
3736; GFX1064-DPP-NEXT:    s_mov_b32 s51, 0x31e16000
3737; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
3738; GFX1064-DPP-NEXT:    s_add_u32 s48, s48, s11
3739; GFX1064-DPP-NEXT:    s_addc_u32 s49, s49, 0
3740; GFX1064-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3741; GFX1064-DPP-NEXT:    s_movk_i32 s32, 0x800
3742; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
3743; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
3744; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB6_3
3745; GFX1064-DPP-NEXT:  ; %bb.1:
3746; GFX1064-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3747; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3748; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3749; GFX1064-DPP-NEXT:    s_mov_b32 s33, s10
3750; GFX1064-DPP-NEXT:    s_mov_b32 s42, s9
3751; GFX1064-DPP-NEXT:    s_mov_b32 s43, s8
3752; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3753; GFX1064-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
3754; GFX1064-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3755; GFX1064-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3756; GFX1064-DPP-NEXT:    s_mov_b64 s[46:47], 0
3757; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3758; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3759; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3760; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s1
3761; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, s0
3762; GFX1064-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3763; GFX1064-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3764; GFX1064-DPP-NEXT:    s_waitcnt vmcnt(0)
3765; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3766; GFX1064-DPP-NEXT:    s_add_u32 s8, s36, 44
3767; GFX1064-DPP-NEXT:    s_addc_u32 s9, s37, 0
3768; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
3769; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3770; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3771; GFX1064-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3772; GFX1064-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3773; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3774; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
3775; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 8
3776; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
3777; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s44
3778; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 8
3779; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0
3780; GFX1064-DPP-NEXT:    v_mov_b32_e32 v7, 0
3781; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
3782; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3783; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3784; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3785; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
3786; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
3787; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
3788; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
3789; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3790; GFX1064-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3791; GFX1064-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3792; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, s45
3793; GFX1064-DPP-NEXT:    v_mov_b32_e32 v4, 0
3794; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3795; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3796; GFX1064-DPP-NEXT:    s_clause 0x1
3797; GFX1064-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3798; GFX1064-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3799; GFX1064-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
3800; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3801; GFX1064-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3802; GFX1064-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
3803; GFX1064-DPP-NEXT:    s_cbranch_execnz .LBB6_2
3804; GFX1064-DPP-NEXT:  .LBB6_3:
3805; GFX1064-DPP-NEXT:    s_endpgm
3806;
3807; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3808; GFX1032-DPP:       ; %bb.0:
3809; GFX1032-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
3810; GFX1032-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
3811; GFX1032-DPP-NEXT:    s_mov_b32 s50, -1
3812; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
3813; GFX1032-DPP-NEXT:    s_mov_b32 s51, 0x31c16000
3814; GFX1032-DPP-NEXT:    s_add_u32 s48, s48, s11
3815; GFX1032-DPP-NEXT:    s_addc_u32 s49, s49, 0
3816; GFX1032-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3817; GFX1032-DPP-NEXT:    s_mov_b32 s46, 0
3818; GFX1032-DPP-NEXT:    s_movk_i32 s32, 0x400
3819; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
3820; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
3821; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB6_3
3822; GFX1032-DPP-NEXT:  ; %bb.1:
3823; GFX1032-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
3824; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
3825; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
3826; GFX1032-DPP-NEXT:    s_mov_b32 s33, s10
3827; GFX1032-DPP-NEXT:    s_mov_b32 s42, s9
3828; GFX1032-DPP-NEXT:    s_mov_b32 s43, s8
3829; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3830; GFX1032-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
3831; GFX1032-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3832; GFX1032-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3833; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3834; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
3835; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3836; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s1
3837; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, s0
3838; GFX1032-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3839; GFX1032-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3840; GFX1032-DPP-NEXT:    s_waitcnt vmcnt(0)
3841; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3842; GFX1032-DPP-NEXT:    s_add_u32 s8, s36, 44
3843; GFX1032-DPP-NEXT:    s_addc_u32 s9, s37, 0
3844; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
3845; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3846; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3847; GFX1032-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
3848; GFX1032-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
3849; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
3850; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
3851; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 8
3852; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
3853; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s44
3854; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 8
3855; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0
3856; GFX1032-DPP-NEXT:    v_mov_b32_e32 v7, 0
3857; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
3858; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3859; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3860; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3861; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
3862; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
3863; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
3864; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
3865; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3866; GFX1032-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
3867; GFX1032-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
3868; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, s45
3869; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, 0
3870; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3871; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
3872; GFX1032-DPP-NEXT:    s_clause 0x1
3873; GFX1032-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
3874; GFX1032-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
3875; GFX1032-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
3876; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3877; GFX1032-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
3878; GFX1032-DPP-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
3879; GFX1032-DPP-NEXT:    s_cbranch_execnz .LBB6_2
3880; GFX1032-DPP-NEXT:  .LBB6_3:
3881; GFX1032-DPP-NEXT:    s_endpgm
3882;
3883; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3884; GFX1164-DPP:       ; %bb.0:
3885; GFX1164-DPP-NEXT:    v_mov_b32_e32 v40, v0
3886; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
3887; GFX1164-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3888; GFX1164-DPP-NEXT:    s_mov_b32 s32, 32
3889; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
3890; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3891; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
3892; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
3893; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB6_3
3894; GFX1164-DPP-NEXT:  ; %bb.1:
3895; GFX1164-DPP-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
3896; GFX1164-DPP-NEXT:    s_mov_b32 s33, s10
3897; GFX1164-DPP-NEXT:    s_mov_b32 s42, s9
3898; GFX1164-DPP-NEXT:    s_mov_b32 s43, s8
3899; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3900; GFX1164-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3901; GFX1164-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3902; GFX1164-DPP-NEXT:    s_mov_b64 s[46:47], 0
3903; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3904; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
3905; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3906; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s1
3907; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, s0
3908; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
3909; GFX1164-DPP-NEXT:    .p2align 6
3910; GFX1164-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3911; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3912; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
3913; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3914; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3915; GFX1164-DPP-NEXT:    s_add_u32 s8, s36, 44
3916; GFX1164-DPP-NEXT:    s_addc_u32 s9, s37, 0
3917; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
3918; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3919; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3920; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v40
3921; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
3922; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 8
3923; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 8
3924; GFX1164-DPP-NEXT:    v_mov_b32_e32 v6, 0
3925; GFX1164-DPP-NEXT:    v_mov_b32_e32 v7, 0
3926; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3927; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3928; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3929; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
3930; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
3931; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
3932; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3933; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
3934; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0
3935; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s44
3936; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
3937; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, s45
3938; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
3939; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3940; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3941; GFX1164-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
3942; GFX1164-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
3943; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3944; GFX1164-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3945; GFX1164-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
3946; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
3947; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB6_2
3948; GFX1164-DPP-NEXT:  .LBB6_3:
3949; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
3950; GFX1164-DPP-NEXT:    s_endpgm
3951;
3952; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
3953; GFX1132-DPP:       ; %bb.0:
3954; GFX1132-DPP-NEXT:    v_mov_b32_e32 v40, v0
3955; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
3956; GFX1132-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
3957; GFX1132-DPP-NEXT:    s_mov_b32 s46, 0
3958; GFX1132-DPP-NEXT:    s_mov_b32 s32, 32
3959; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
3960; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
3961; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB6_3
3962; GFX1132-DPP-NEXT:  ; %bb.1:
3963; GFX1132-DPP-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
3964; GFX1132-DPP-NEXT:    s_mov_b32 s33, s15
3965; GFX1132-DPP-NEXT:    s_mov_b32 s42, s14
3966; GFX1132-DPP-NEXT:    s_mov_b32 s43, s13
3967; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
3968; GFX1132-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
3969; GFX1132-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
3970; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3971; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
3972; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
3973; GFX1132-DPP-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
3974; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
3975; GFX1132-DPP-NEXT:    .p2align 6
3976; GFX1132-DPP-NEXT:  .LBB6_2: ; %atomicrmw.start
3977; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
3978; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
3979; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3980; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
3981; GFX1132-DPP-NEXT:    s_add_u32 s8, s36, 44
3982; GFX1132-DPP-NEXT:    s_addc_u32 s9, s37, 0
3983; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
3984; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
3985; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
3986; GFX1132-DPP-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
3987; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
3988; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
3989; GFX1132-DPP-NEXT:    v_mov_b32_e32 v7, 0
3990; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
3991; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
3992; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
3993; GFX1132-DPP-NEXT:    s_mov_b32 s12, s43
3994; GFX1132-DPP-NEXT:    s_mov_b32 s13, s42
3995; GFX1132-DPP-NEXT:    s_mov_b32 s14, s33
3996; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3997; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
3998; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
3999; GFX1132-DPP-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
4000; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
4001; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
4002; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4003; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4004; GFX1132-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
4005; GFX1132-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
4006; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
4007; GFX1132-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
4008; GFX1132-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
4009; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
4010; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB6_2
4011; GFX1132-DPP-NEXT:  .LBB6_3:
4012; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
4013; GFX1132-DPP-NEXT:    s_endpgm
4014  %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1
4015  ret void
4016}
4017
4018define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
4019; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4020; GFX7LESS:       ; %bb.0:
4021; GFX7LESS-NEXT:    s_movk_i32 s32, 0x800
4022; GFX7LESS-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4023; GFX7LESS-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4024; GFX7LESS-NEXT:    s_mov_b32 s50, -1
4025; GFX7LESS-NEXT:    s_mov_b32 s51, 0xe8f000
4026; GFX7LESS-NEXT:    s_add_u32 s48, s48, s11
4027; GFX7LESS-NEXT:    s_addc_u32 s49, s49, 0
4028; GFX7LESS-NEXT:    s_mov_b32 s33, s10
4029; GFX7LESS-NEXT:    s_mov_b32 s42, s9
4030; GFX7LESS-NEXT:    s_mov_b32 s43, s8
4031; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[6:7]
4032; GFX7LESS-NEXT:    s_mov_b64 s[36:37], s[4:5]
4033; GFX7LESS-NEXT:    s_mov_b64 s[38:39], s[2:3]
4034; GFX7LESS-NEXT:    s_mov_b64 s[40:41], s[0:1]
4035; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
4036; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
4037; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
4038; GFX7LESS-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4039; GFX7LESS-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4040; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4041; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4042; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4043; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
4044; GFX7LESS-NEXT:    v_or_b32_e32 v40, v0, v2
4045; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
4046; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
4047; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
4048; GFX7LESS-NEXT:    s_mov_b32 s12, s43
4049; GFX7LESS-NEXT:    s_mov_b32 s13, s42
4050; GFX7LESS-NEXT:    s_mov_b32 s14, s33
4051; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
4052; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
4053; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
4054; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
4055; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4056; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
4057; GFX7LESS-NEXT:    v_mov_b32_e32 v2, 0
4058; GFX7LESS-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4059; GFX7LESS-NEXT:  .LBB7_1: ; %ComputeLoop
4060; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
4061; GFX7LESS-NEXT:    s_ff1_i32_b64 s4, s[0:1]
4062; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4063; GFX7LESS-NEXT:    v_readlane_b32 s3, v1, s4
4064; GFX7LESS-NEXT:    v_readlane_b32 s2, v0, s4
4065; GFX7LESS-NEXT:    s_lshl_b64 s[4:5], 1, s4
4066; GFX7LESS-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4067; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
4068; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
4069; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
4070; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4071; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB7_1
4072; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
4073; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
4074; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
4075; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4076; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
4077; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
4078; GFX7LESS-NEXT:    s_cbranch_execz .LBB7_5
4079; GFX7LESS-NEXT:  ; %bb.3:
4080; GFX7LESS-NEXT:    s_load_dwordx2 s[44:45], s[36:37], 0x9
4081; GFX7LESS-NEXT:    s_mov_b32 s47, 0xf000
4082; GFX7LESS-NEXT:    s_mov_b32 s46, -1
4083; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
4084; GFX7LESS-NEXT:    buffer_load_dwordx2 v[0:1], off, s[44:47], 0
4085; GFX7LESS-NEXT:    s_mov_b64 s[46:47], 0
4086; GFX7LESS-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4087; GFX7LESS-NEXT:  .LBB7_4: ; %atomicrmw.start
4088; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
4089; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
4090; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
4091; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
4092; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0
4093; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
4094; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
4095; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], v[41:42]
4096; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
4097; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
4098; GFX7LESS-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4099; GFX7LESS-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4100; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
4101; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
4102; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4103; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
4104; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 8
4105; GFX7LESS-NEXT:    v_mov_b32_e32 v1, 0
4106; GFX7LESS-NEXT:    v_mov_b32_e32 v4, 0
4107; GFX7LESS-NEXT:    v_mov_b32_e32 v5, 8
4108; GFX7LESS-NEXT:    v_mov_b32_e32 v6, 0
4109; GFX7LESS-NEXT:    v_mov_b32_e32 v7, 0
4110; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
4111; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[38:39]
4112; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
4113; GFX7LESS-NEXT:    s_mov_b32 s12, s43
4114; GFX7LESS-NEXT:    s_mov_b32 s13, s42
4115; GFX7LESS-NEXT:    s_mov_b32 s14, s33
4116; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
4117; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
4118; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
4119; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s44
4120; GFX7LESS-NEXT:    v_mov_b32_e32 v3, s45
4121; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
4122; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4123; GFX7LESS-NEXT:    v_and_b32_e32 v2, 1, v0
4124; GFX7LESS-NEXT:    buffer_load_dword v0, off, s[48:51], 0
4125; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
4126; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
4127; GFX7LESS-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
4128; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[46:47]
4129; GFX7LESS-NEXT:    s_cbranch_execnz .LBB7_4
4130; GFX7LESS-NEXT:  .LBB7_5:
4131; GFX7LESS-NEXT:    s_endpgm
4132;
4133; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4134; GFX9:       ; %bb.0:
4135; GFX9-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4136; GFX9-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4137; GFX9-NEXT:    s_mov_b32 s50, -1
4138; GFX9-NEXT:    s_mov_b32 s51, 0xe00000
4139; GFX9-NEXT:    s_add_u32 s48, s48, s11
4140; GFX9-NEXT:    s_addc_u32 s49, s49, 0
4141; GFX9-NEXT:    s_mov_b64 s[36:37], s[4:5]
4142; GFX9-NEXT:    s_mov_b32 s43, s8
4143; GFX9-NEXT:    s_add_u32 s8, s36, 44
4144; GFX9-NEXT:    s_mov_b32 s42, s9
4145; GFX9-NEXT:    s_addc_u32 s9, s37, 0
4146; GFX9-NEXT:    s_mov_b64 s[40:41], s[0:1]
4147; GFX9-NEXT:    s_getpc_b64 s[0:1]
4148; GFX9-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4149; GFX9-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4150; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4151; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4152; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4153; GFX9-NEXT:    s_mov_b32 s33, s10
4154; GFX9-NEXT:    s_mov_b64 s[34:35], s[6:7]
4155; GFX9-NEXT:    s_mov_b64 s[38:39], s[2:3]
4156; GFX9-NEXT:    v_or3_b32 v40, v0, v1, v2
4157; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
4158; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
4159; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
4160; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
4161; GFX9-NEXT:    s_mov_b32 s12, s43
4162; GFX9-NEXT:    s_mov_b32 s13, s42
4163; GFX9-NEXT:    s_mov_b32 s14, s33
4164; GFX9-NEXT:    v_mov_b32_e32 v31, v40
4165; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
4166; GFX9-NEXT:    s_movk_i32 s32, 0x800
4167; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4168; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4169; GFX9-NEXT:    v_mov_b32_e32 v2, 0
4170; GFX9-NEXT:    s_mov_b64 s[0:1], exec
4171; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4172; GFX9-NEXT:  .LBB7_1: ; %ComputeLoop
4173; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4174; GFX9-NEXT:    s_ff1_i32_b64 s4, s[0:1]
4175; GFX9-NEXT:    v_readlane_b32 s3, v1, s4
4176; GFX9-NEXT:    v_readlane_b32 s2, v0, s4
4177; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4178; GFX9-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4179; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s4
4180; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
4181; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
4182; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4183; GFX9-NEXT:    s_cbranch_scc1 .LBB7_1
4184; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
4185; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4186; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
4187; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4188; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
4189; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
4190; GFX9-NEXT:    s_cbranch_execz .LBB7_5
4191; GFX9-NEXT:  ; %bb.3:
4192; GFX9-NEXT:    s_load_dwordx2 s[44:45], s[36:37], 0x24
4193; GFX9-NEXT:    v_mov_b32_e32 v0, 0
4194; GFX9-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4195; GFX9-NEXT:    s_mov_b64 s[46:47], 0
4196; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4197; GFX9-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
4198; GFX9-NEXT:  .LBB7_4: ; %atomicrmw.start
4199; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4200; GFX9-NEXT:    s_waitcnt vmcnt(0)
4201; GFX9-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
4202; GFX9-NEXT:    s_add_u32 s8, s36, 44
4203; GFX9-NEXT:    s_addc_u32 s9, s37, 0
4204; GFX9-NEXT:    s_getpc_b64 s[0:1]
4205; GFX9-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4206; GFX9-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4207; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4208; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
4209; GFX9-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
4210; GFX9-NEXT:    buffer_store_dword v4, off, s[48:51], 0
4211; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
4212; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4213; GFX9-NEXT:    s_mov_b64 s[6:7], s[38:39]
4214; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
4215; GFX9-NEXT:    s_mov_b32 s12, s43
4216; GFX9-NEXT:    s_mov_b32 s13, s42
4217; GFX9-NEXT:    s_mov_b32 s14, s33
4218; GFX9-NEXT:    v_mov_b32_e32 v31, v40
4219; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
4220; GFX9-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
4221; GFX9-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
4222; GFX9-NEXT:    v_mov_b32_e32 v0, 8
4223; GFX9-NEXT:    v_mov_b32_e32 v1, 0
4224; GFX9-NEXT:    v_mov_b32_e32 v2, s44
4225; GFX9-NEXT:    v_mov_b32_e32 v3, s45
4226; GFX9-NEXT:    v_mov_b32_e32 v4, 0
4227; GFX9-NEXT:    v_mov_b32_e32 v5, 8
4228; GFX9-NEXT:    v_mov_b32_e32 v6, 0
4229; GFX9-NEXT:    v_mov_b32_e32 v7, 0
4230; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4231; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4232; GFX9-NEXT:    buffer_load_dword v4, off, s[48:51], 0
4233; GFX9-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
4234; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
4235; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
4236; GFX9-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
4237; GFX9-NEXT:    s_andn2_b64 exec, exec, s[46:47]
4238; GFX9-NEXT:    s_cbranch_execnz .LBB7_4
4239; GFX9-NEXT:  .LBB7_5:
4240; GFX9-NEXT:    s_endpgm
4241;
4242; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4243; GFX1064:       ; %bb.0:
4244; GFX1064-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4245; GFX1064-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4246; GFX1064-NEXT:    s_mov_b32 s50, -1
4247; GFX1064-NEXT:    s_mov_b32 s51, 0x31e16000
4248; GFX1064-NEXT:    s_add_u32 s48, s48, s11
4249; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
4250; GFX1064-NEXT:    s_addc_u32 s49, s49, 0
4251; GFX1064-NEXT:    s_mov_b32 s43, s8
4252; GFX1064-NEXT:    s_add_u32 s8, s34, 44
4253; GFX1064-NEXT:    s_mov_b32 s42, s9
4254; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
4255; GFX1064-NEXT:    s_mov_b64 s[40:41], s[0:1]
4256; GFX1064-NEXT:    s_getpc_b64 s[0:1]
4257; GFX1064-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4258; GFX1064-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4259; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4260; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4261; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4262; GFX1064-NEXT:    s_mov_b32 s33, s10
4263; GFX1064-NEXT:    s_mov_b64 s[36:37], s[6:7]
4264; GFX1064-NEXT:    s_mov_b64 s[38:39], s[2:3]
4265; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
4266; GFX1064-NEXT:    v_or3_b32 v40, v0, v1, v2
4267; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
4268; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
4269; GFX1064-NEXT:    s_mov_b64 s[10:11], s[36:37]
4270; GFX1064-NEXT:    s_mov_b32 s12, s43
4271; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
4272; GFX1064-NEXT:    s_mov_b32 s13, s42
4273; GFX1064-NEXT:    s_mov_b32 s14, s33
4274; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
4275; GFX1064-NEXT:    s_movk_i32 s32, 0x800
4276; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
4277; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4278; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
4279; GFX1064-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4280; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
4281; GFX1064-NEXT:  .LBB7_1: ; %ComputeLoop
4282; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
4283; GFX1064-NEXT:    s_ff1_i32_b64 s4, s[0:1]
4284; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4285; GFX1064-NEXT:    v_readlane_b32 s3, v1, s4
4286; GFX1064-NEXT:    v_readlane_b32 s2, v0, s4
4287; GFX1064-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4288; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s4
4289; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
4290; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
4291; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4292; GFX1064-NEXT:    s_cbranch_scc1 .LBB7_1
4293; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
4294; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4295; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
4296; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4297; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
4298; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
4299; GFX1064-NEXT:    s_cbranch_execz .LBB7_5
4300; GFX1064-NEXT:  ; %bb.3:
4301; GFX1064-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
4302; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
4303; GFX1064-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4304; GFX1064-NEXT:    s_mov_b64 s[46:47], 0
4305; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
4306; GFX1064-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
4307; GFX1064-NEXT:  .LBB7_4: ; %atomicrmw.start
4308; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
4309; GFX1064-NEXT:    s_waitcnt vmcnt(0)
4310; GFX1064-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
4311; GFX1064-NEXT:    s_add_u32 s8, s34, 44
4312; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
4313; GFX1064-NEXT:    s_getpc_b64 s[0:1]
4314; GFX1064-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4315; GFX1064-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4316; GFX1064-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
4317; GFX1064-NEXT:    buffer_store_dword v4, off, s[48:51], 0
4318; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4319; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
4320; GFX1064-NEXT:    v_mov_b32_e32 v2, s44
4321; GFX1064-NEXT:    v_mov_b32_e32 v3, s45
4322; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
4323; GFX1064-NEXT:    v_mov_b32_e32 v5, 8
4324; GFX1064-NEXT:    v_mov_b32_e32 v6, 0
4325; GFX1064-NEXT:    v_mov_b32_e32 v7, 0
4326; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
4327; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
4328; GFX1064-NEXT:    s_mov_b64 s[6:7], s[38:39]
4329; GFX1064-NEXT:    s_mov_b64 s[10:11], s[36:37]
4330; GFX1064-NEXT:    s_mov_b32 s12, s43
4331; GFX1064-NEXT:    s_mov_b32 s13, s42
4332; GFX1064-NEXT:    s_mov_b32 s14, s33
4333; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
4334; GFX1064-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4335; GFX1064-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
4336; GFX1064-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
4337; GFX1064-NEXT:    v_mov_b32_e32 v0, 8
4338; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
4339; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
4340; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4341; GFX1064-NEXT:    s_clause 0x1
4342; GFX1064-NEXT:    buffer_load_dword v4, off, s[48:51], 0
4343; GFX1064-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
4344; GFX1064-NEXT:    v_and_b32_e32 v0, 1, v0
4345; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
4346; GFX1064-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
4347; GFX1064-NEXT:    s_andn2_b64 exec, exec, s[46:47]
4348; GFX1064-NEXT:    s_cbranch_execnz .LBB7_4
4349; GFX1064-NEXT:  .LBB7_5:
4350; GFX1064-NEXT:    s_endpgm
4351;
4352; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4353; GFX1032:       ; %bb.0:
4354; GFX1032-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4355; GFX1032-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4356; GFX1032-NEXT:    s_mov_b32 s50, -1
4357; GFX1032-NEXT:    s_mov_b32 s51, 0x31c16000
4358; GFX1032-NEXT:    s_add_u32 s48, s48, s11
4359; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
4360; GFX1032-NEXT:    s_addc_u32 s49, s49, 0
4361; GFX1032-NEXT:    s_mov_b32 s43, s8
4362; GFX1032-NEXT:    s_add_u32 s8, s34, 44
4363; GFX1032-NEXT:    s_mov_b32 s42, s9
4364; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
4365; GFX1032-NEXT:    s_mov_b64 s[40:41], s[0:1]
4366; GFX1032-NEXT:    s_getpc_b64 s[0:1]
4367; GFX1032-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4368; GFX1032-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4369; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4370; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4371; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4372; GFX1032-NEXT:    s_mov_b32 s33, s10
4373; GFX1032-NEXT:    s_mov_b64 s[36:37], s[6:7]
4374; GFX1032-NEXT:    s_mov_b64 s[38:39], s[2:3]
4375; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
4376; GFX1032-NEXT:    v_or3_b32 v40, v0, v1, v2
4377; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
4378; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
4379; GFX1032-NEXT:    s_mov_b64 s[10:11], s[36:37]
4380; GFX1032-NEXT:    s_mov_b32 s12, s43
4381; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
4382; GFX1032-NEXT:    s_mov_b32 s13, s42
4383; GFX1032-NEXT:    s_mov_b32 s14, s33
4384; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
4385; GFX1032-NEXT:    s_movk_i32 s32, 0x400
4386; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
4387; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4388; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
4389; GFX1032-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4390; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
4391; GFX1032-NEXT:  .LBB7_1: ; %ComputeLoop
4392; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
4393; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
4394; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4395; GFX1032-NEXT:    v_readlane_b32 s3, v1, s1
4396; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
4397; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
4398; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
4399; GFX1032-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4400; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
4401; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4402; GFX1032-NEXT:    s_cbranch_scc1 .LBB7_1
4403; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
4404; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4405; GFX1032-NEXT:    s_mov_b32 s46, 0
4406; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
4407; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
4408; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
4409; GFX1032-NEXT:    s_cbranch_execz .LBB7_5
4410; GFX1032-NEXT:  ; %bb.3:
4411; GFX1032-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
4412; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
4413; GFX1032-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4414; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
4415; GFX1032-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
4416; GFX1032-NEXT:  .LBB7_4: ; %atomicrmw.start
4417; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
4418; GFX1032-NEXT:    s_waitcnt vmcnt(0)
4419; GFX1032-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
4420; GFX1032-NEXT:    s_add_u32 s8, s34, 44
4421; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
4422; GFX1032-NEXT:    s_getpc_b64 s[0:1]
4423; GFX1032-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4424; GFX1032-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4425; GFX1032-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
4426; GFX1032-NEXT:    buffer_store_dword v4, off, s[48:51], 0
4427; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4428; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
4429; GFX1032-NEXT:    v_mov_b32_e32 v2, s44
4430; GFX1032-NEXT:    v_mov_b32_e32 v3, s45
4431; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
4432; GFX1032-NEXT:    v_mov_b32_e32 v5, 8
4433; GFX1032-NEXT:    v_mov_b32_e32 v6, 0
4434; GFX1032-NEXT:    v_mov_b32_e32 v7, 0
4435; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
4436; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
4437; GFX1032-NEXT:    s_mov_b64 s[6:7], s[38:39]
4438; GFX1032-NEXT:    s_mov_b64 s[10:11], s[36:37]
4439; GFX1032-NEXT:    s_mov_b32 s12, s43
4440; GFX1032-NEXT:    s_mov_b32 s13, s42
4441; GFX1032-NEXT:    s_mov_b32 s14, s33
4442; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
4443; GFX1032-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4444; GFX1032-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
4445; GFX1032-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
4446; GFX1032-NEXT:    v_mov_b32_e32 v0, 8
4447; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
4448; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
4449; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4450; GFX1032-NEXT:    s_clause 0x1
4451; GFX1032-NEXT:    buffer_load_dword v4, off, s[48:51], 0
4452; GFX1032-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
4453; GFX1032-NEXT:    v_and_b32_e32 v0, 1, v0
4454; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
4455; GFX1032-NEXT:    s_or_b32 s46, vcc_lo, s46
4456; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
4457; GFX1032-NEXT:    s_cbranch_execnz .LBB7_4
4458; GFX1032-NEXT:  .LBB7_5:
4459; GFX1032-NEXT:    s_endpgm
4460;
4461; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4462; GFX1164:       ; %bb.0:
4463; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
4464; GFX1164-NEXT:    s_mov_b32 s43, s8
4465; GFX1164-NEXT:    s_add_u32 s8, s34, 44
4466; GFX1164-NEXT:    s_mov_b32 s42, s9
4467; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
4468; GFX1164-NEXT:    s_mov_b64 s[40:41], s[0:1]
4469; GFX1164-NEXT:    s_getpc_b64 s[0:1]
4470; GFX1164-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4471; GFX1164-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4472; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
4473; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4474; GFX1164-NEXT:    s_mov_b32 s33, s10
4475; GFX1164-NEXT:    s_mov_b64 s[36:37], s[6:7]
4476; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
4477; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
4478; GFX1164-NEXT:    s_mov_b64 s[10:11], s[36:37]
4479; GFX1164-NEXT:    s_mov_b32 s12, s43
4480; GFX1164-NEXT:    s_mov_b32 s13, s42
4481; GFX1164-NEXT:    s_mov_b32 s14, s33
4482; GFX1164-NEXT:    s_mov_b32 s32, 32
4483; GFX1164-NEXT:    v_mov_b32_e32 v40, v0
4484; GFX1164-NEXT:    s_mov_b64 s[38:39], s[2:3]
4485; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
4486; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4487; GFX1164-NEXT:    v_mov_b32_e32 v2, 0
4488; GFX1164-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4489; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
4490; GFX1164-NEXT:  .LBB7_1: ; %ComputeLoop
4491; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
4492; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4493; GFX1164-NEXT:    s_ctz_i32_b64 s4, s[0:1]
4494; GFX1164-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4495; GFX1164-NEXT:    v_readlane_b32 s3, v1, s4
4496; GFX1164-NEXT:    v_readlane_b32 s2, v0, s4
4497; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
4498; GFX1164-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4499; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s4
4500; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
4501; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4502; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
4503; GFX1164-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4504; GFX1164-NEXT:    s_cbranch_scc1 .LBB7_1
4505; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
4506; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4507; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
4508; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4509; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
4510; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
4511; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
4512; GFX1164-NEXT:    s_cbranch_execz .LBB7_5
4513; GFX1164-NEXT:  ; %bb.3:
4514; GFX1164-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
4515; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
4516; GFX1164-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4517; GFX1164-NEXT:    s_mov_b64 s[46:47], 0
4518; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
4519; GFX1164-NEXT:    global_load_b64 v[4:5], v0, s[44:45]
4520; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x1
4521; GFX1164-NEXT:    .p2align 6
4522; GFX1164-NEXT:  .LBB7_4: ; %atomicrmw.start
4523; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
4524; GFX1164-NEXT:    s_waitcnt vmcnt(0)
4525; GFX1164-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
4526; GFX1164-NEXT:    s_add_u32 s8, s34, 44
4527; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
4528; GFX1164-NEXT:    s_getpc_b64 s[0:1]
4529; GFX1164-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4530; GFX1164-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4531; GFX1164-NEXT:    v_mov_b32_e32 v31, v40
4532; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4533; GFX1164-NEXT:    v_mov_b32_e32 v2, s44
4534; GFX1164-NEXT:    v_mov_b32_e32 v3, s45
4535; GFX1164-NEXT:    v_mov_b32_e32 v6, 0
4536; GFX1164-NEXT:    v_mov_b32_e32 v7, 0
4537; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
4538; GFX1164-NEXT:    s_mov_b64 s[6:7], s[38:39]
4539; GFX1164-NEXT:    s_mov_b64 s[10:11], s[36:37]
4540; GFX1164-NEXT:    s_mov_b32 s12, s43
4541; GFX1164-NEXT:    s_mov_b32 s13, s42
4542; GFX1164-NEXT:    s_mov_b32 s14, s33
4543; GFX1164-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4544; GFX1164-NEXT:    scratch_store_b64 off, v[4:5], off
4545; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
4546; GFX1164-NEXT:    v_mov_b32_e32 v5, 8
4547; GFX1164-NEXT:    scratch_store_b64 off, v[0:1], off offset:8
4548; GFX1164-NEXT:    v_mov_b32_e32 v0, 8
4549; GFX1164-NEXT:    v_mov_b32_e32 v1, 0
4550; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
4551; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4552; GFX1164-NEXT:    scratch_load_b64 v[4:5], off, off
4553; GFX1164-NEXT:    v_and_b32_e32 v0, 1, v0
4554; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
4555; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
4556; GFX1164-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
4557; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
4558; GFX1164-NEXT:    s_cbranch_execnz .LBB7_4
4559; GFX1164-NEXT:  .LBB7_5:
4560; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x2
4561; GFX1164-NEXT:    s_endpgm
4562;
4563; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4564; GFX1132:       ; %bb.0:
4565; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
4566; GFX1132-NEXT:    s_mov_b64 s[40:41], s[0:1]
4567; GFX1132-NEXT:    s_add_u32 s8, s34, 44
4568; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
4569; GFX1132-NEXT:    s_getpc_b64 s[0:1]
4570; GFX1132-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4571; GFX1132-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4572; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
4573; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4574; GFX1132-NEXT:    s_mov_b64 s[36:37], s[6:7]
4575; GFX1132-NEXT:    s_mov_b32 s42, s14
4576; GFX1132-NEXT:    s_mov_b32 s43, s13
4577; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
4578; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
4579; GFX1132-NEXT:    s_mov_b64 s[10:11], s[36:37]
4580; GFX1132-NEXT:    s_mov_b32 s12, s13
4581; GFX1132-NEXT:    s_mov_b32 s13, s14
4582; GFX1132-NEXT:    s_mov_b32 s14, s15
4583; GFX1132-NEXT:    s_mov_b32 s32, 32
4584; GFX1132-NEXT:    s_mov_b32 s33, s15
4585; GFX1132-NEXT:    v_mov_b32_e32 v40, v0
4586; GFX1132-NEXT:    s_mov_b64 s[38:39], s[2:3]
4587; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
4588; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4589; GFX1132-NEXT:    v_mov_b32_e32 v2, 0
4590; GFX1132-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
4591; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
4592; GFX1132-NEXT:  .LBB7_1: ; %ComputeLoop
4593; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
4594; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4595; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
4596; GFX1132-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
4597; GFX1132-NEXT:    v_readlane_b32 s3, v1, s1
4598; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
4599; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
4600; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4601; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
4602; GFX1132-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
4603; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
4604; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4605; GFX1132-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
4606; GFX1132-NEXT:    s_cbranch_scc1 .LBB7_1
4607; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
4608; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4609; GFX1132-NEXT:    s_mov_b32 s46, 0
4610; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
4611; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4612; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
4613; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
4614; GFX1132-NEXT:    s_cbranch_execz .LBB7_5
4615; GFX1132-NEXT:  ; %bb.3:
4616; GFX1132-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
4617; GFX1132-NEXT:    v_mov_b32_e32 v0, 0
4618; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_4)
4619; GFX1132-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
4620; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
4621; GFX1132-NEXT:    global_load_b64 v[4:5], v0, s[44:45]
4622; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x1
4623; GFX1132-NEXT:    .p2align 6
4624; GFX1132-NEXT:  .LBB7_4: ; %atomicrmw.start
4625; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
4626; GFX1132-NEXT:    s_waitcnt vmcnt(0)
4627; GFX1132-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
4628; GFX1132-NEXT:    s_add_u32 s8, s34, 44
4629; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
4630; GFX1132-NEXT:    s_getpc_b64 s[0:1]
4631; GFX1132-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4632; GFX1132-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4633; GFX1132-NEXT:    v_mov_b32_e32 v31, v40
4634; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4635; GFX1132-NEXT:    v_mov_b32_e32 v3, s45
4636; GFX1132-NEXT:    v_mov_b32_e32 v7, 0
4637; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
4638; GFX1132-NEXT:    s_mov_b64 s[6:7], s[38:39]
4639; GFX1132-NEXT:    s_mov_b64 s[10:11], s[36:37]
4640; GFX1132-NEXT:    s_mov_b32 s12, s43
4641; GFX1132-NEXT:    s_mov_b32 s13, s42
4642; GFX1132-NEXT:    s_mov_b32 s14, s33
4643; GFX1132-NEXT:    v_mov_b32_e32 v6, 0
4644; GFX1132-NEXT:    v_mov_b32_e32 v2, s44
4645; GFX1132-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4646; GFX1132-NEXT:    scratch_store_b64 off, v[4:5], off
4647; GFX1132-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
4648; GFX1132-NEXT:    scratch_store_b64 off, v[0:1], off offset:8
4649; GFX1132-NEXT:    v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
4650; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
4651; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4652; GFX1132-NEXT:    scratch_load_b64 v[4:5], off, off
4653; GFX1132-NEXT:    v_and_b32_e32 v0, 1, v0
4654; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
4655; GFX1132-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
4656; GFX1132-NEXT:    s_or_b32 s46, vcc_lo, s46
4657; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
4658; GFX1132-NEXT:    s_cbranch_execnz .LBB7_4
4659; GFX1132-NEXT:  .LBB7_5:
4660; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x2
4661; GFX1132-NEXT:    s_endpgm
4662;
4663; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4664; GFX7LESS-DPP:       ; %bb.0:
4665; GFX7LESS-DPP-NEXT:    s_movk_i32 s32, 0x800
4666; GFX7LESS-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4667; GFX7LESS-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4668; GFX7LESS-DPP-NEXT:    s_mov_b32 s50, -1
4669; GFX7LESS-DPP-NEXT:    s_mov_b32 s51, 0xe8f000
4670; GFX7LESS-DPP-NEXT:    s_add_u32 s48, s48, s11
4671; GFX7LESS-DPP-NEXT:    s_addc_u32 s49, s49, 0
4672; GFX7LESS-DPP-NEXT:    s_mov_b32 s33, s10
4673; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, s9
4674; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, s8
4675; GFX7LESS-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
4676; GFX7LESS-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
4677; GFX7LESS-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
4678; GFX7LESS-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
4679; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
4680; GFX7LESS-DPP-NEXT:    s_mov_b32 s47, 0xf000
4681; GFX7LESS-DPP-NEXT:    s_mov_b32 s46, -1
4682; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
4683; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
4684; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
4685; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4686; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4687; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4688; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4689; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4690; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
4691; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v40, v0, v2
4692; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
4693; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
4694; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
4695; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
4696; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
4697; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
4698; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
4699; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
4700; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
4701; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4702; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4703; GFX7LESS-DPP-NEXT:    buffer_load_dwordx2 v[2:3], off, s[44:47], 0
4704; GFX7LESS-DPP-NEXT:    s_mov_b64 s[46:47], 0
4705; GFX7LESS-DPP-NEXT:    v_max_f64 v[41:42], v[0:1], v[0:1]
4706; GFX7LESS-DPP-NEXT:  .LBB7_1: ; %atomicrmw.start
4707; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
4708; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
4709; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
4710; GFX7LESS-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:4
4711; GFX7LESS-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0
4712; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
4713; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
4714; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
4715; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
4716; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4717; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4718; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
4719; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
4720; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4721; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
4722; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, 8
4723; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, 0
4724; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, 0
4725; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v5, 8
4726; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, 0
4727; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, 0
4728; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
4729; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
4730; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
4731; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
4732; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
4733; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
4734; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
4735; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
4736; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
4737; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, s44
4738; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, s45
4739; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4740; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4741; GFX7LESS-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
4742; GFX7LESS-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0
4743; GFX7LESS-DPP-NEXT:    buffer_load_dword v3, off, s[48:51], 0 offset:4
4744; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
4745; GFX7LESS-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
4746; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
4747; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB7_1
4748; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
4749; GFX7LESS-DPP-NEXT:    s_endpgm
4750;
4751; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4752; GFX9-DPP:       ; %bb.0:
4753; GFX9-DPP-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
4754; GFX9-DPP-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
4755; GFX9-DPP-NEXT:    s_mov_b32 s54, -1
4756; GFX9-DPP-NEXT:    s_mov_b32 s55, 0xe00000
4757; GFX9-DPP-NEXT:    s_add_u32 s52, s52, s11
4758; GFX9-DPP-NEXT:    s_addc_u32 s53, s53, 0
4759; GFX9-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
4760; GFX9-DPP-NEXT:    s_mov_b32 s43, s8
4761; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
4762; GFX9-DPP-NEXT:    s_mov_b32 s42, s9
4763; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
4764; GFX9-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
4765; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
4766; GFX9-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4767; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4768; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4769; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4770; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4771; GFX9-DPP-NEXT:    s_mov_b32 s33, s10
4772; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
4773; GFX9-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
4774; GFX9-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
4775; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
4776; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[52:53]
4777; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
4778; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
4779; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
4780; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
4781; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
4782; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
4783; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[54:55]
4784; GFX9-DPP-NEXT:    s_movk_i32 s32, 0x800
4785; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4786; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4787; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
4788; GFX9-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
4789; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v11, v9, v1, s[0:1]
4790; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
4791; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
4792; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
4793; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0
4794; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
4795; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
4796; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4797; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
4798; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
4799; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
4800; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
4801; GFX9-DPP-NEXT:    s_nop 0
4802; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
4803; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
4804; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
4805; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
4806; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
4807; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
4808; GFX9-DPP-NEXT:    s_nop 0
4809; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
4810; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
4811; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
4812; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
4813; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
4814; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
4815; GFX9-DPP-NEXT:    s_nop 0
4816; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
4817; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
4818; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
4819; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
4820; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
4821; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
4822; GFX9-DPP-NEXT:    s_nop 0
4823; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
4824; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
4825; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
4826; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
4827; GFX9-DPP-NEXT:    s_nop 1
4828; GFX9-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
4829; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
4830; GFX9-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
4831; GFX9-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
4832; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
4833; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 0
4834; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
4835; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
4836; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
4837; GFX9-DPP-NEXT:    v_readlane_b32 s45, v9, 63
4838; GFX9-DPP-NEXT:    v_readlane_b32 s44, v8, 63
4839; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
4840; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
4841; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
4842; GFX9-DPP-NEXT:    s_cbranch_execz .LBB7_3
4843; GFX9-DPP-NEXT:  ; %bb.1:
4844; GFX9-DPP-NEXT:    s_load_dwordx2 s[46:47], s[36:37], 0x24
4845; GFX9-DPP-NEXT:    s_mov_b64 s[48:49], 0
4846; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4847; GFX9-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[46:47]
4848; GFX9-DPP-NEXT:  .LBB7_2: ; %atomicrmw.start
4849; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
4850; GFX9-DPP-NEXT:    v_max_f64 v[3:4], s[44:45], s[44:45]
4851; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
4852; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[1:2], v[1:2]
4853; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
4854; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
4855; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
4856; GFX9-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4857; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4858; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4859; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[52:53]
4860; GFX9-DPP-NEXT:    buffer_store_dword v2, off, s[52:55], 0 offset:4
4861; GFX9-DPP-NEXT:    buffer_store_dword v1, off, s[52:55], 0
4862; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
4863; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
4864; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
4865; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
4866; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
4867; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
4868; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
4869; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
4870; GFX9-DPP-NEXT:    buffer_store_dword v4, off, s[52:55], 0 offset:12
4871; GFX9-DPP-NEXT:    buffer_store_dword v3, off, s[52:55], 0 offset:8
4872; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[54:55]
4873; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 8
4874; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, 0
4875; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s46
4876; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, s47
4877; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0
4878; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 8
4879; GFX9-DPP-NEXT:    v_mov_b32_e32 v6, 0
4880; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
4881; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4882; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4883; GFX9-DPP-NEXT:    buffer_load_dword v1, off, s[52:55], 0
4884; GFX9-DPP-NEXT:    buffer_load_dword v2, off, s[52:55], 0 offset:4
4885; GFX9-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
4886; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
4887; GFX9-DPP-NEXT:    s_or_b64 s[48:49], vcc, s[48:49]
4888; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[48:49]
4889; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB7_2
4890; GFX9-DPP-NEXT:  .LBB7_3:
4891; GFX9-DPP-NEXT:    s_endpgm
4892;
4893; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
4894; GFX1064-DPP:       ; %bb.0:
4895; GFX1064-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
4896; GFX1064-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
4897; GFX1064-DPP-NEXT:    s_mov_b32 s50, -1
4898; GFX1064-DPP-NEXT:    s_mov_b32 s51, 0x31e16000
4899; GFX1064-DPP-NEXT:    s_add_u32 s48, s48, s11
4900; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
4901; GFX1064-DPP-NEXT:    s_addc_u32 s49, s49, 0
4902; GFX1064-DPP-NEXT:    s_mov_b32 s43, s8
4903; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
4904; GFX1064-DPP-NEXT:    s_mov_b32 s42, s9
4905; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
4906; GFX1064-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
4907; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
4908; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
4909; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
4910; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
4911; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4912; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
4913; GFX1064-DPP-NEXT:    s_mov_b32 s33, s10
4914; GFX1064-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
4915; GFX1064-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
4916; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
4917; GFX1064-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
4918; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
4919; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
4920; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
4921; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
4922; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
4923; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
4924; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
4925; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
4926; GFX1064-DPP-NEXT:    s_movk_i32 s32, 0x800
4927; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4928; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
4929; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
4930; GFX1064-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
4931; GFX1064-DPP-NEXT:    v_mov_b32_e32 v8, 0
4932; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1]
4933; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
4934; GFX1064-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
4935; GFX1064-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
4936; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4937; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
4938; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
4939; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
4940; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
4941; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
4942; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
4943; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4944; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
4945; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
4946; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
4947; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
4948; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
4949; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4950; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
4951; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
4952; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
4953; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
4954; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
4955; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4956; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
4957; GFX1064-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
4958; GFX1064-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
4959; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
4960; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
4961; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v9, 0
4962; GFX1064-DPP-NEXT:    v_readlane_b32 s5, v9, 32
4963; GFX1064-DPP-NEXT:    v_readlane_b32 s4, v8, 32
4964; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v8, 0
4965; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], s[4:5], s[4:5]
4966; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], s[2:3], s[2:3]
4967; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
4968; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
4969; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
4970; GFX1064-DPP-NEXT:    v_mov_b32_e32 v41, v8
4971; GFX1064-DPP-NEXT:    v_mov_b32_e32 v42, v9
4972; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
4973; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
4974; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
4975; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
4976; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB7_3
4977; GFX1064-DPP-NEXT:  ; %bb.1:
4978; GFX1064-DPP-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
4979; GFX1064-DPP-NEXT:    s_mov_b64 s[46:47], 0
4980; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
4981; GFX1064-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[44:45]
4982; GFX1064-DPP-NEXT:  .LBB7_2: ; %atomicrmw.start
4983; GFX1064-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
4984; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[41:42], v[41:42]
4985; GFX1064-DPP-NEXT:    s_waitcnt vmcnt(0)
4986; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[1:2], v[1:2]
4987; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
4988; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
4989; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
4990; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
4991; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
4992; GFX1064-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
4993; GFX1064-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
4994; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
4995; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
4996; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 8
4997; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
4998; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s44
4999; GFX1064-DPP-NEXT:    v_mov_b32_e32 v7, 0
5000; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
5001; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5002; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
5003; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5004; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
5005; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
5006; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
5007; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
5008; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
5009; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 8
5010; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0
5011; GFX1064-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
5012; GFX1064-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
5013; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, s45
5014; GFX1064-DPP-NEXT:    v_mov_b32_e32 v4, 0
5015; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5016; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5017; GFX1064-DPP-NEXT:    s_clause 0x1
5018; GFX1064-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
5019; GFX1064-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
5020; GFX1064-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
5021; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
5022; GFX1064-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
5023; GFX1064-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
5024; GFX1064-DPP-NEXT:    s_cbranch_execnz .LBB7_2
5025; GFX1064-DPP-NEXT:  .LBB7_3:
5026; GFX1064-DPP-NEXT:    s_endpgm
5027;
5028; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
5029; GFX1032-DPP:       ; %bb.0:
5030; GFX1032-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
5031; GFX1032-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
5032; GFX1032-DPP-NEXT:    s_mov_b32 s50, -1
5033; GFX1032-DPP-NEXT:    s_mov_b32 s51, 0x31c16000
5034; GFX1032-DPP-NEXT:    s_add_u32 s48, s48, s11
5035; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
5036; GFX1032-DPP-NEXT:    s_addc_u32 s49, s49, 0
5037; GFX1032-DPP-NEXT:    s_mov_b32 s43, s8
5038; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
5039; GFX1032-DPP-NEXT:    s_mov_b32 s42, s9
5040; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
5041; GFX1032-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
5042; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
5043; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
5044; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
5045; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
5046; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
5047; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
5048; GFX1032-DPP-NEXT:    s_mov_b32 s33, s10
5049; GFX1032-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
5050; GFX1032-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
5051; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
5052; GFX1032-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
5053; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
5054; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5055; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5056; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
5057; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
5058; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
5059; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
5060; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
5061; GFX1032-DPP-NEXT:    s_movk_i32 s32, 0x400
5062; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5063; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5064; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
5065; GFX1032-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
5066; GFX1032-DPP-NEXT:    v_mov_b32_e32 v8, 0
5067; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0
5068; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s0
5069; GFX1032-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
5070; GFX1032-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
5071; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5072; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
5073; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
5074; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5075; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
5076; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
5077; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
5078; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5079; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5080; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5081; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
5082; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
5083; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
5084; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5085; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5086; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5087; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
5088; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
5089; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
5090; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5091; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5092; GFX1032-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
5093; GFX1032-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
5094; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5095; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5096; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
5097; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, v8
5098; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
5099; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
5100; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, v9
5101; GFX1032-DPP-NEXT:    s_mov_b32 s46, 0
5102; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
5103; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
5104; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB7_3
5105; GFX1032-DPP-NEXT:  ; %bb.1:
5106; GFX1032-DPP-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
5107; GFX1032-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
5108; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5109; GFX1032-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[44:45]
5110; GFX1032-DPP-NEXT:  .LBB7_2: ; %atomicrmw.start
5111; GFX1032-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5112; GFX1032-DPP-NEXT:    s_waitcnt vmcnt(0)
5113; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
5114; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
5115; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
5116; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
5117; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
5118; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
5119; GFX1032-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
5120; GFX1032-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
5121; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
5122; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
5123; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 8
5124; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
5125; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s44
5126; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 8
5127; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0
5128; GFX1032-DPP-NEXT:    v_mov_b32_e32 v7, 0
5129; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
5130; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5131; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
5132; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5133; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
5134; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
5135; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
5136; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
5137; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
5138; GFX1032-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
5139; GFX1032-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
5140; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, s45
5141; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, 0
5142; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5143; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5144; GFX1032-DPP-NEXT:    s_clause 0x1
5145; GFX1032-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
5146; GFX1032-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
5147; GFX1032-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
5148; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
5149; GFX1032-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
5150; GFX1032-DPP-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
5151; GFX1032-DPP-NEXT:    s_cbranch_execnz .LBB7_2
5152; GFX1032-DPP-NEXT:  .LBB7_3:
5153; GFX1032-DPP-NEXT:    s_endpgm
5154;
5155; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
5156; GFX1164-DPP:       ; %bb.0:
5157; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
5158; GFX1164-DPP-NEXT:    s_mov_b32 s43, s8
5159; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
5160; GFX1164-DPP-NEXT:    s_mov_b32 s42, s9
5161; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
5162; GFX1164-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
5163; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
5164; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
5165; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
5166; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
5167; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5168; GFX1164-DPP-NEXT:    s_mov_b32 s33, s10
5169; GFX1164-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
5170; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5171; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
5172; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5173; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
5174; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
5175; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
5176; GFX1164-DPP-NEXT:    s_mov_b32 s32, 32
5177; GFX1164-DPP-NEXT:    v_mov_b32_e32 v40, v0
5178; GFX1164-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
5179; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5180; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5181; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
5182; GFX1164-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
5183; GFX1164-DPP-NEXT:    v_mov_b32_e32 v8, 0
5184; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1]
5185; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
5186; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5187; GFX1164-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
5188; GFX1164-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
5189; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5190; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5191; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
5192; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
5193; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5194; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
5195; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5196; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
5197; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
5198; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5199; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5200; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5201; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5202; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
5203; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5204; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
5205; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
5206; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5207; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5208; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5209; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
5210; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
5211; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5212; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
5213; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
5214; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5215; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5216; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5217; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5218; GFX1164-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
5219; GFX1164-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
5220; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5221; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5222; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5223; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5224; GFX1164-DPP-NEXT:    v_permlane64_b32 v11, v9
5225; GFX1164-DPP-NEXT:    v_permlane64_b32 v10, v8
5226; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5227; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5228; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5229; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
5230; GFX1164-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5231; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5232; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, v8
5233; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5234; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, v9
5235; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
5236; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
5237; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
5238; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5239; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v1
5240; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB7_3
5241; GFX1164-DPP-NEXT:  ; %bb.1:
5242; GFX1164-DPP-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
5243; GFX1164-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
5244; GFX1164-DPP-NEXT:    s_mov_b64 s[46:47], 0
5245; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5246; GFX1164-DPP-NEXT:    global_load_b64 v[1:2], v0, s[44:45]
5247; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
5248; GFX1164-DPP-NEXT:    .p2align 6
5249; GFX1164-DPP-NEXT:  .LBB7_2: ; %atomicrmw.start
5250; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5251; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
5252; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
5253; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
5254; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
5255; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
5256; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
5257; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
5258; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v40
5259; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5260; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 8
5261; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 8
5262; GFX1164-DPP-NEXT:    v_mov_b32_e32 v6, 0
5263; GFX1164-DPP-NEXT:    v_mov_b32_e32 v7, 0
5264; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5265; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
5266; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5267; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
5268; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
5269; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
5270; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
5271; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
5272; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0
5273; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s44
5274; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
5275; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, s45
5276; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
5277; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5278; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5279; GFX1164-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
5280; GFX1164-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
5281; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
5282; GFX1164-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
5283; GFX1164-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
5284; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
5285; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB7_2
5286; GFX1164-DPP-NEXT:  .LBB7_3:
5287; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
5288; GFX1164-DPP-NEXT:    s_endpgm
5289;
5290; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
5291; GFX1132-DPP:       ; %bb.0:
5292; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
5293; GFX1132-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
5294; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
5295; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
5296; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
5297; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
5298; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
5299; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
5300; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5301; GFX1132-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
5302; GFX1132-DPP-NEXT:    s_mov_b32 s42, s14
5303; GFX1132-DPP-NEXT:    s_mov_b32 s43, s13
5304; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5305; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
5306; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5307; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
5308; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
5309; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
5310; GFX1132-DPP-NEXT:    s_mov_b32 s32, 32
5311; GFX1132-DPP-NEXT:    s_mov_b32 s33, s15
5312; GFX1132-DPP-NEXT:    v_mov_b32_e32 v40, v0
5313; GFX1132-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
5314; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5315; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5316; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
5317; GFX1132-DPP-NEXT:    v_dual_mov_b32 v9, 0x7ff80000 :: v_dual_mov_b32 v8, 0
5318; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0
5319; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s0
5320; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5321; GFX1132-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
5322; GFX1132-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
5323; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5324; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5325; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
5326; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
5327; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
5328; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5329; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
5330; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
5331; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5332; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5333; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5334; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
5335; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5336; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
5337; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
5338; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5339; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5340; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5341; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
5342; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5343; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
5344; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
5345; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5346; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5347; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5348; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5349; GFX1132-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
5350; GFX1132-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
5351; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5352; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
5353; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
5354; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
5355; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5356; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
5357; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
5358; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, v9
5359; GFX1132-DPP-NEXT:    s_mov_b32 s46, 0
5360; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
5361; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5362; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v1
5363; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB7_3
5364; GFX1132-DPP-NEXT:  ; %bb.1:
5365; GFX1132-DPP-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
5366; GFX1132-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
5367; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5368; GFX1132-DPP-NEXT:    global_load_b64 v[1:2], v0, s[44:45]
5369; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
5370; GFX1132-DPP-NEXT:    .p2align 6
5371; GFX1132-DPP-NEXT:  .LBB7_2: ; %atomicrmw.start
5372; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5373; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
5374; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
5375; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
5376; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
5377; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
5378; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
5379; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
5380; GFX1132-DPP-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
5381; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5382; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
5383; GFX1132-DPP-NEXT:    v_mov_b32_e32 v7, 0
5384; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
5385; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
5386; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
5387; GFX1132-DPP-NEXT:    s_mov_b32 s12, s43
5388; GFX1132-DPP-NEXT:    s_mov_b32 s13, s42
5389; GFX1132-DPP-NEXT:    s_mov_b32 s14, s33
5390; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_4)
5391; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
5392; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
5393; GFX1132-DPP-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
5394; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
5395; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
5396; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5397; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5398; GFX1132-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
5399; GFX1132-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
5400; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
5401; GFX1132-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
5402; GFX1132-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
5403; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
5404; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB7_2
5405; GFX1132-DPP-NEXT:  .LBB7_3:
5406; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
5407; GFX1132-DPP-NEXT:    s_endpgm
5408  %divValue = call double @div.double.value()
5409  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1
5410  ret void
5411}
5412
5413define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
5414; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5415; GFX7LESS:       ; %bb.0:
5416; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
5417; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
5418; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5419; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5420; GFX7LESS-NEXT:    s_cbranch_execz .LBB8_3
5421; GFX7LESS-NEXT:  ; %bb.1:
5422; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
5423; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
5424; GFX7LESS-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x0
5425; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
5426; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
5427; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
5428; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s6
5429; GFX7LESS-NEXT:    v_mov_b32_e32 v3, s7
5430; GFX7LESS-NEXT:    s_mov_b32 s2, -1
5431; GFX7LESS-NEXT:  .LBB8_2: ; %atomicrmw.start
5432; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
5433; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5434; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5435; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
5436; GFX7LESS-NEXT:    v_mov_b32_e32 v7, v3
5437; GFX7LESS-NEXT:    v_mov_b32_e32 v6, v2
5438; GFX7LESS-NEXT:    v_mov_b32_e32 v5, v1
5439; GFX7LESS-NEXT:    v_mov_b32_e32 v4, v0
5440; GFX7LESS-NEXT:    buffer_atomic_cmpswap_x2 v[4:7], off, s[0:3], 0 glc
5441; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
5442; GFX7LESS-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3]
5443; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5444; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v4
5445; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v5
5446; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5447; GFX7LESS-NEXT:    s_cbranch_execnz .LBB8_2
5448; GFX7LESS-NEXT:  .LBB8_3:
5449; GFX7LESS-NEXT:    s_endpgm
5450;
5451; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5452; GFX9:       ; %bb.0:
5453; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5454; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5455; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5456; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5457; GFX9-NEXT:    s_cbranch_execz .LBB8_3
5458; GFX9-NEXT:  ; %bb.1:
5459; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5460; GFX9-NEXT:    s_mov_b64 s[2:3], 0
5461; GFX9-NEXT:    v_mov_b32_e32 v4, 0
5462; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5463; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
5464; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5465; GFX9-NEXT:    v_mov_b32_e32 v2, s4
5466; GFX9-NEXT:    v_mov_b32_e32 v3, s5
5467; GFX9-NEXT:  .LBB8_2: ; %atomicrmw.start
5468; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5469; GFX9-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5470; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5471; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
5472; GFX9-NEXT:    s_waitcnt vmcnt(0)
5473; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5474; GFX9-NEXT:    v_mov_b32_e32 v3, v1
5475; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
5476; GFX9-NEXT:    v_mov_b32_e32 v2, v0
5477; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
5478; GFX9-NEXT:    s_cbranch_execnz .LBB8_2
5479; GFX9-NEXT:  .LBB8_3:
5480; GFX9-NEXT:    s_endpgm
5481;
5482; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5483; GFX1064:       ; %bb.0:
5484; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5485; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5486; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5487; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5488; GFX1064-NEXT:    s_cbranch_execz .LBB8_2
5489; GFX1064-NEXT:  ; %bb.1:
5490; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5491; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
5492; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x40100000
5493; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
5494; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
5495; GFX1064-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
5496; GFX1064-NEXT:  .LBB8_2:
5497; GFX1064-NEXT:    s_endpgm
5498;
5499; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5500; GFX1032:       ; %bb.0:
5501; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5502; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
5503; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
5504; GFX1032-NEXT:    s_cbranch_execz .LBB8_2
5505; GFX1032-NEXT:  ; %bb.1:
5506; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5507; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
5508; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x40100000
5509; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
5510; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
5511; GFX1032-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
5512; GFX1032-NEXT:  .LBB8_2:
5513; GFX1032-NEXT:    s_endpgm
5514;
5515; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5516; GFX1164:       ; %bb.0:
5517; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5518; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
5519; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5520; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5521; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
5522; GFX1164-NEXT:    s_cbranch_execz .LBB8_3
5523; GFX1164-NEXT:  ; %bb.1:
5524; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
5525; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
5526; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
5527; GFX1164-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
5528; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
5529; GFX1164-NEXT:    v_mov_b32_e32 v2, s2
5530; GFX1164-NEXT:    v_mov_b32_e32 v3, s3
5531; GFX1164-NEXT:    s_mov_b64 s[2:3], 0
5532; GFX1164-NEXT:  .LBB8_2: ; %atomicrmw.start
5533; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
5534; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5535; GFX1164-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5536; GFX1164-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5537; GFX1164-NEXT:    global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
5538; GFX1164-NEXT:    s_waitcnt vmcnt(0)
5539; GFX1164-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5540; GFX1164-NEXT:    v_mov_b32_e32 v3, v1
5541; GFX1164-NEXT:    v_mov_b32_e32 v2, v0
5542; GFX1164-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
5543; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5544; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[2:3]
5545; GFX1164-NEXT:    s_cbranch_execnz .LBB8_2
5546; GFX1164-NEXT:  .LBB8_3:
5547; GFX1164-NEXT:    s_endpgm
5548;
5549; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5550; GFX1132:       ; %bb.0:
5551; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5552; GFX1132-NEXT:    s_mov_b32 s2, 0
5553; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
5554; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5555; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
5556; GFX1132-NEXT:    s_cbranch_execz .LBB8_3
5557; GFX1132-NEXT:  ; %bb.1:
5558; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
5559; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
5560; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
5561; GFX1132-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
5562; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
5563; GFX1132-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
5564; GFX1132-NEXT:  .LBB8_2: ; %atomicrmw.start
5565; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
5566; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5567; GFX1132-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5568; GFX1132-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5569; GFX1132-NEXT:    global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
5570; GFX1132-NEXT:    s_waitcnt vmcnt(0)
5571; GFX1132-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
5572; GFX1132-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
5573; GFX1132-NEXT:    s_or_b32 s2, vcc_lo, s2
5574; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5575; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s2
5576; GFX1132-NEXT:    s_cbranch_execnz .LBB8_2
5577; GFX1132-NEXT:  .LBB8_3:
5578; GFX1132-NEXT:    s_endpgm
5579;
5580; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5581; GFX7LESS-DPP:       ; %bb.0:
5582; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
5583; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
5584; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5585; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5586; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB8_3
5587; GFX7LESS-DPP-NEXT:  ; %bb.1:
5588; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
5589; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5590; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x0
5591; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
5592; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
5593; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5594; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, s6
5595; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, s7
5596; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
5597; GFX7LESS-DPP-NEXT:  .LBB8_2: ; %atomicrmw.start
5598; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5599; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5600; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5601; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
5602; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, v3
5603; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, v2
5604; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v5, v1
5605; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, v0
5606; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap_x2 v[4:7], off, s[0:3], 0 glc
5607; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
5608; GFX7LESS-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3]
5609; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5610; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v4
5611; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v5
5612; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5613; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB8_2
5614; GFX7LESS-DPP-NEXT:  .LBB8_3:
5615; GFX7LESS-DPP-NEXT:    s_endpgm
5616;
5617; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5618; GFX9-DPP:       ; %bb.0:
5619; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5620; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5621; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5622; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5623; GFX9-DPP-NEXT:    s_cbranch_execz .LBB8_3
5624; GFX9-DPP-NEXT:  ; %bb.1:
5625; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5626; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
5627; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0
5628; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5629; GFX9-DPP-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
5630; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5631; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s4
5632; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, s5
5633; GFX9-DPP-NEXT:  .LBB8_2: ; %atomicrmw.start
5634; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5635; GFX9-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5636; GFX9-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5637; GFX9-DPP-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
5638; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
5639; GFX9-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5640; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v1
5641; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
5642; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, v0
5643; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
5644; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB8_2
5645; GFX9-DPP-NEXT:  .LBB8_3:
5646; GFX9-DPP-NEXT:    s_endpgm
5647;
5648; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5649; GFX1064-DPP:       ; %bb.0:
5650; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5651; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5652; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5653; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5654; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB8_2
5655; GFX1064-DPP-NEXT:  ; %bb.1:
5656; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5657; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
5658; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0x40100000
5659; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, 0
5660; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5661; GFX1064-DPP-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
5662; GFX1064-DPP-NEXT:  .LBB8_2:
5663; GFX1064-DPP-NEXT:    s_endpgm
5664;
5665; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5666; GFX1032-DPP:       ; %bb.0:
5667; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5668; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
5669; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
5670; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB8_2
5671; GFX1032-DPP-NEXT:  ; %bb.1:
5672; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
5673; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
5674; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0x40100000
5675; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, 0
5676; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5677; GFX1032-DPP-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
5678; GFX1032-DPP-NEXT:  .LBB8_2:
5679; GFX1032-DPP-NEXT:    s_endpgm
5680;
5681; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5682; GFX1164-DPP:       ; %bb.0:
5683; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5684; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
5685; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5686; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5687; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
5688; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB8_3
5689; GFX1164-DPP-NEXT:  ; %bb.1:
5690; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
5691; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
5692; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5693; GFX1164-DPP-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
5694; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5695; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s2
5696; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, s3
5697; GFX1164-DPP-NEXT:    s_mov_b64 s[2:3], 0
5698; GFX1164-DPP-NEXT:  .LBB8_2: ; %atomicrmw.start
5699; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5700; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5701; GFX1164-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5702; GFX1164-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5703; GFX1164-DPP-NEXT:    global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
5704; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
5705; GFX1164-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5706; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, v1
5707; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, v0
5708; GFX1164-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
5709; GFX1164-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5710; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[2:3]
5711; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB8_2
5712; GFX1164-DPP-NEXT:  .LBB8_3:
5713; GFX1164-DPP-NEXT:    s_endpgm
5714;
5715; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe:
5716; GFX1132-DPP:       ; %bb.0:
5717; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5718; GFX1132-DPP-NEXT:    s_mov_b32 s2, 0
5719; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
5720; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5721; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
5722; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB8_3
5723; GFX1132-DPP-NEXT:  ; %bb.1:
5724; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
5725; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, 0
5726; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5727; GFX1132-DPP-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
5728; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
5729; GFX1132-DPP-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
5730; GFX1132-DPP-NEXT:  .LBB8_2: ; %atomicrmw.start
5731; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
5732; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5733; GFX1132-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5734; GFX1132-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], 4.0
5735; GFX1132-DPP-NEXT:    global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc
5736; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
5737; GFX1132-DPP-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
5738; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
5739; GFX1132-DPP-NEXT:    s_or_b32 s2, vcc_lo, s2
5740; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5741; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s2
5742; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB8_2
5743; GFX1132-DPP-NEXT:  .LBB8_3:
5744; GFX1132-DPP-NEXT:    s_endpgm
5745  %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1
5746  ret void
5747}
5748
5749define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
5750; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
5751; GFX7LESS:       ; %bb.0:
5752; GFX7LESS-NEXT:    s_mov_b32 s32, 0
5753; GFX7LESS-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5754; GFX7LESS-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5755; GFX7LESS-NEXT:    s_mov_b32 s38, -1
5756; GFX7LESS-NEXT:    s_mov_b32 s39, 0xe8f000
5757; GFX7LESS-NEXT:    s_add_u32 s36, s36, s11
5758; GFX7LESS-NEXT:    s_addc_u32 s37, s37, 0
5759; GFX7LESS-NEXT:    s_mov_b32 s14, s10
5760; GFX7LESS-NEXT:    s_mov_b32 s13, s9
5761; GFX7LESS-NEXT:    s_mov_b32 s12, s8
5762; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[6:7]
5763; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[4:5]
5764; GFX7LESS-NEXT:    s_add_u32 s8, s34, 44
5765; GFX7LESS-NEXT:    s_addc_u32 s9, s35, 0
5766; GFX7LESS-NEXT:    s_getpc_b64 s[4:5]
5767; GFX7LESS-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
5768; GFX7LESS-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
5769; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
5770; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
5771; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
5772; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
5773; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
5774; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
5775; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
5776; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
5777; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[38:39]
5778; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
5779; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5780; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
5781; GFX7LESS-NEXT:    v_mov_b32_e32 v4, 0
5782; GFX7LESS-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
5783; GFX7LESS-NEXT:  .LBB9_1: ; %ComputeLoop
5784; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
5785; GFX7LESS-NEXT:    s_ff1_i32_b64 s4, s[0:1]
5786; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
5787; GFX7LESS-NEXT:    v_readlane_b32 s3, v1, s4
5788; GFX7LESS-NEXT:    v_readlane_b32 s2, v0, s4
5789; GFX7LESS-NEXT:    s_lshl_b64 s[4:5], 1, s4
5790; GFX7LESS-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
5791; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
5792; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
5793; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
5794; GFX7LESS-NEXT:    v_max_f64 v[4:5], v[2:3], v[4:5]
5795; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB9_1
5796; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
5797; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
5798; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
5799; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5800; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5801; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
5802; GFX7LESS-NEXT:    s_cbranch_execz .LBB9_5
5803; GFX7LESS-NEXT:  ; %bb.3:
5804; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x9
5805; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
5806; GFX7LESS-NEXT:    s_mov_b32 s2, -1
5807; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
5808; GFX7LESS-NEXT:    buffer_load_dwordx2 v[2:3], off, s[0:3], 0
5809; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
5810; GFX7LESS-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
5811; GFX7LESS-NEXT:  .LBB9_4: ; %atomicrmw.start
5812; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
5813; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
5814; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5815; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
5816; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
5817; GFX7LESS-NEXT:    v_mov_b32_e32 v9, v3
5818; GFX7LESS-NEXT:    v_mov_b32_e32 v8, v2
5819; GFX7LESS-NEXT:    v_mov_b32_e32 v7, v1
5820; GFX7LESS-NEXT:    v_mov_b32_e32 v6, v0
5821; GFX7LESS-NEXT:    buffer_atomic_cmpswap_x2 v[6:9], off, s[0:3], 0 glc
5822; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
5823; GFX7LESS-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
5824; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5825; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v6
5826; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v7
5827; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5828; GFX7LESS-NEXT:    s_cbranch_execnz .LBB9_4
5829; GFX7LESS-NEXT:  .LBB9_5:
5830; GFX7LESS-NEXT:    s_endpgm
5831;
5832; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
5833; GFX9:       ; %bb.0:
5834; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5835; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5836; GFX9-NEXT:    s_mov_b32 s38, -1
5837; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
5838; GFX9-NEXT:    s_add_u32 s36, s36, s11
5839; GFX9-NEXT:    s_addc_u32 s37, s37, 0
5840; GFX9-NEXT:    s_mov_b64 s[34:35], s[4:5]
5841; GFX9-NEXT:    s_mov_b32 s12, s8
5842; GFX9-NEXT:    s_add_u32 s8, s34, 44
5843; GFX9-NEXT:    s_mov_b32 s13, s9
5844; GFX9-NEXT:    s_addc_u32 s9, s35, 0
5845; GFX9-NEXT:    s_getpc_b64 s[4:5]
5846; GFX9-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
5847; GFX9-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
5848; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
5849; GFX9-NEXT:    s_mov_b32 s14, s10
5850; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
5851; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
5852; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
5853; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
5854; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
5855; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
5856; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
5857; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
5858; GFX9-NEXT:    s_mov_b32 s32, 0
5859; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5860; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5861; GFX9-NEXT:    v_mov_b32_e32 v4, 0
5862; GFX9-NEXT:    s_mov_b64 s[0:1], exec
5863; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
5864; GFX9-NEXT:  .LBB9_1: ; %ComputeLoop
5865; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5866; GFX9-NEXT:    s_ff1_i32_b64 s4, s[0:1]
5867; GFX9-NEXT:    v_readlane_b32 s3, v1, s4
5868; GFX9-NEXT:    v_readlane_b32 s2, v0, s4
5869; GFX9-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
5870; GFX9-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
5871; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s4
5872; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
5873; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
5874; GFX9-NEXT:    v_max_f64 v[4:5], v[2:3], v[4:5]
5875; GFX9-NEXT:    s_cbranch_scc1 .LBB9_1
5876; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
5877; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5878; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5879; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5880; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5881; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
5882; GFX9-NEXT:    s_cbranch_execz .LBB9_5
5883; GFX9-NEXT:  ; %bb.3:
5884; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
5885; GFX9-NEXT:    v_mov_b32_e32 v6, 0
5886; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
5887; GFX9-NEXT:    s_mov_b64 s[2:3], 0
5888; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5889; GFX9-NEXT:    global_load_dwordx2 v[2:3], v6, s[0:1]
5890; GFX9-NEXT:  .LBB9_4: ; %atomicrmw.start
5891; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5892; GFX9-NEXT:    s_waitcnt vmcnt(0)
5893; GFX9-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
5894; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
5895; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
5896; GFX9-NEXT:    s_waitcnt vmcnt(0)
5897; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5898; GFX9-NEXT:    v_mov_b32_e32 v3, v1
5899; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
5900; GFX9-NEXT:    v_mov_b32_e32 v2, v0
5901; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
5902; GFX9-NEXT:    s_cbranch_execnz .LBB9_4
5903; GFX9-NEXT:  .LBB9_5:
5904; GFX9-NEXT:    s_endpgm
5905;
5906; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
5907; GFX1064:       ; %bb.0:
5908; GFX1064-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5909; GFX1064-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5910; GFX1064-NEXT:    s_mov_b32 s38, -1
5911; GFX1064-NEXT:    s_mov_b32 s39, 0x31e16000
5912; GFX1064-NEXT:    s_add_u32 s36, s36, s11
5913; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
5914; GFX1064-NEXT:    s_addc_u32 s37, s37, 0
5915; GFX1064-NEXT:    s_mov_b32 s12, s8
5916; GFX1064-NEXT:    s_add_u32 s8, s34, 44
5917; GFX1064-NEXT:    s_mov_b32 s13, s9
5918; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
5919; GFX1064-NEXT:    s_getpc_b64 s[4:5]
5920; GFX1064-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
5921; GFX1064-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
5922; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
5923; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
5924; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
5925; GFX1064-NEXT:    s_mov_b32 s14, s10
5926; GFX1064-NEXT:    s_mov_b64 s[10:11], s[6:7]
5927; GFX1064-NEXT:    s_mov_b64 s[4:5], s[0:1]
5928; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
5929; GFX1064-NEXT:    v_or3_b32 v31, v0, v1, v2
5930; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
5931; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
5932; GFX1064-NEXT:    s_mov_b32 s32, 0
5933; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
5934; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5935; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
5936; GFX1064-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
5937; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
5938; GFX1064-NEXT:  .LBB9_1: ; %ComputeLoop
5939; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
5940; GFX1064-NEXT:    s_ff1_i32_b64 s4, s[0:1]
5941; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
5942; GFX1064-NEXT:    v_readlane_b32 s3, v1, s4
5943; GFX1064-NEXT:    v_readlane_b32 s2, v0, s4
5944; GFX1064-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
5945; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s4
5946; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
5947; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
5948; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
5949; GFX1064-NEXT:    s_cbranch_scc1 .LBB9_1
5950; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
5951; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
5952; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
5953; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
5954; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
5955; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
5956; GFX1064-NEXT:    s_cbranch_execz .LBB9_4
5957; GFX1064-NEXT:  ; %bb.3:
5958; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
5959; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
5960; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
5961; GFX1064-NEXT:    global_atomic_fmax_x2 v0, v[2:3], s[0:1]
5962; GFX1064-NEXT:  .LBB9_4:
5963; GFX1064-NEXT:    s_endpgm
5964;
5965; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
5966; GFX1032:       ; %bb.0:
5967; GFX1032-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5968; GFX1032-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5969; GFX1032-NEXT:    s_mov_b32 s38, -1
5970; GFX1032-NEXT:    s_mov_b32 s39, 0x31c16000
5971; GFX1032-NEXT:    s_add_u32 s36, s36, s11
5972; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
5973; GFX1032-NEXT:    s_addc_u32 s37, s37, 0
5974; GFX1032-NEXT:    s_mov_b32 s12, s8
5975; GFX1032-NEXT:    s_add_u32 s8, s34, 44
5976; GFX1032-NEXT:    s_mov_b32 s13, s9
5977; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
5978; GFX1032-NEXT:    s_getpc_b64 s[4:5]
5979; GFX1032-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
5980; GFX1032-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
5981; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
5982; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
5983; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
5984; GFX1032-NEXT:    s_mov_b32 s14, s10
5985; GFX1032-NEXT:    s_mov_b64 s[10:11], s[6:7]
5986; GFX1032-NEXT:    s_mov_b64 s[4:5], s[0:1]
5987; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
5988; GFX1032-NEXT:    v_or3_b32 v31, v0, v1, v2
5989; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
5990; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
5991; GFX1032-NEXT:    s_mov_b32 s32, 0
5992; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
5993; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
5994; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
5995; GFX1032-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
5996; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
5997; GFX1032-NEXT:  .LBB9_1: ; %ComputeLoop
5998; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
5999; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
6000; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
6001; GFX1032-NEXT:    v_readlane_b32 s3, v1, s1
6002; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
6003; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
6004; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
6005; GFX1032-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
6006; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
6007; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6008; GFX1032-NEXT:    s_cbranch_scc1 .LBB9_1
6009; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
6010; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6011; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
6012; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
6013; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
6014; GFX1032-NEXT:    s_cbranch_execz .LBB9_4
6015; GFX1032-NEXT:  ; %bb.3:
6016; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
6017; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
6018; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
6019; GFX1032-NEXT:    global_atomic_fmax_x2 v0, v[2:3], s[0:1]
6020; GFX1032-NEXT:  .LBB9_4:
6021; GFX1032-NEXT:    s_endpgm
6022;
6023; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6024; GFX1164:       ; %bb.0:
6025; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
6026; GFX1164-NEXT:    s_mov_b32 s12, s8
6027; GFX1164-NEXT:    s_add_u32 s8, s34, 44
6028; GFX1164-NEXT:    s_mov_b32 s13, s9
6029; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
6030; GFX1164-NEXT:    s_getpc_b64 s[4:5]
6031; GFX1164-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6032; GFX1164-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6033; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
6034; GFX1164-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
6035; GFX1164-NEXT:    s_mov_b32 s14, s10
6036; GFX1164-NEXT:    s_mov_b64 s[10:11], s[6:7]
6037; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
6038; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
6039; GFX1164-NEXT:    s_mov_b32 s32, 0
6040; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
6041; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6042; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
6043; GFX1164-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
6044; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
6045; GFX1164-NEXT:  .LBB9_1: ; %ComputeLoop
6046; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
6047; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6048; GFX1164-NEXT:    s_ctz_i32_b64 s4, s[0:1]
6049; GFX1164-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
6050; GFX1164-NEXT:    v_readlane_b32 s3, v1, s4
6051; GFX1164-NEXT:    v_readlane_b32 s2, v0, s4
6052; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
6053; GFX1164-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
6054; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s4
6055; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
6056; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6057; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
6058; GFX1164-NEXT:    v_max_f64 v[4:5], v[2:3], v[4:5]
6059; GFX1164-NEXT:    s_cbranch_scc1 .LBB9_1
6060; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
6061; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6062; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
6063; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6064; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
6065; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
6066; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
6067; GFX1164-NEXT:    s_cbranch_execz .LBB9_5
6068; GFX1164-NEXT:  ; %bb.3:
6069; GFX1164-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
6070; GFX1164-NEXT:    v_mov_b32_e32 v6, 0
6071; GFX1164-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6072; GFX1164-NEXT:    s_mov_b64 s[2:3], 0
6073; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
6074; GFX1164-NEXT:    global_load_b64 v[2:3], v6, s[0:1]
6075; GFX1164-NEXT:  .LBB9_4: ; %atomicrmw.start
6076; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
6077; GFX1164-NEXT:    s_waitcnt vmcnt(0)
6078; GFX1164-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
6079; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6080; GFX1164-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
6081; GFX1164-NEXT:    global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
6082; GFX1164-NEXT:    s_waitcnt vmcnt(0)
6083; GFX1164-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6084; GFX1164-NEXT:    v_mov_b32_e32 v3, v1
6085; GFX1164-NEXT:    v_mov_b32_e32 v2, v0
6086; GFX1164-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
6087; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6088; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[2:3]
6089; GFX1164-NEXT:    s_cbranch_execnz .LBB9_4
6090; GFX1164-NEXT:  .LBB9_5:
6091; GFX1164-NEXT:    s_endpgm
6092;
6093; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6094; GFX1132:       ; %bb.0:
6095; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
6096; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
6097; GFX1132-NEXT:    s_add_u32 s8, s34, 44
6098; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
6099; GFX1132-NEXT:    s_getpc_b64 s[4:5]
6100; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6101; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6102; GFX1132-NEXT:    s_mov_b32 s12, s13
6103; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
6104; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
6105; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
6106; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
6107; GFX1132-NEXT:    s_mov_b32 s13, s14
6108; GFX1132-NEXT:    s_mov_b32 s14, s15
6109; GFX1132-NEXT:    s_mov_b32 s32, 0
6110; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
6111; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6112; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
6113; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
6114; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
6115; GFX1132-NEXT:  .LBB9_1: ; %ComputeLoop
6116; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
6117; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6118; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
6119; GFX1132-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
6120; GFX1132-NEXT:    v_readlane_b32 s3, v1, s1
6121; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
6122; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
6123; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6124; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
6125; GFX1132-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
6126; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
6127; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6128; GFX1132-NEXT:    v_max_f64 v[4:5], v[2:3], v[4:5]
6129; GFX1132-NEXT:    s_cbranch_scc1 .LBB9_1
6130; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
6131; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6132; GFX1132-NEXT:    s_mov_b32 s2, 0
6133; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
6134; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6135; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
6136; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
6137; GFX1132-NEXT:    s_cbranch_execz .LBB9_5
6138; GFX1132-NEXT:  ; %bb.3:
6139; GFX1132-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
6140; GFX1132-NEXT:    v_mov_b32_e32 v6, 0
6141; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_4)
6142; GFX1132-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6143; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
6144; GFX1132-NEXT:    global_load_b64 v[2:3], v6, s[0:1]
6145; GFX1132-NEXT:  .LBB9_4: ; %atomicrmw.start
6146; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
6147; GFX1132-NEXT:    s_waitcnt vmcnt(0)
6148; GFX1132-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
6149; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6150; GFX1132-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
6151; GFX1132-NEXT:    global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc
6152; GFX1132-NEXT:    s_waitcnt vmcnt(0)
6153; GFX1132-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
6154; GFX1132-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
6155; GFX1132-NEXT:    s_or_b32 s2, vcc_lo, s2
6156; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6157; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s2
6158; GFX1132-NEXT:    s_cbranch_execnz .LBB9_4
6159; GFX1132-NEXT:  .LBB9_5:
6160; GFX1132-NEXT:    s_endpgm
6161;
6162; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6163; GFX7LESS-DPP:       ; %bb.0:
6164; GFX7LESS-DPP-NEXT:    s_mov_b32 s32, 0
6165; GFX7LESS-DPP-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
6166; GFX7LESS-DPP-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
6167; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, -1
6168; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, 0xe8f000
6169; GFX7LESS-DPP-NEXT:    s_add_u32 s40, s40, s11
6170; GFX7LESS-DPP-NEXT:    s_addc_u32 s41, s41, 0
6171; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s10
6172; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s9
6173; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s8
6174; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6175; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x9
6176; GFX7LESS-DPP-NEXT:    s_mov_b32 s39, 0xf000
6177; GFX7LESS-DPP-NEXT:    s_mov_b32 s38, -1
6178; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s4, 44
6179; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s5, 0
6180; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[4:5]
6181; GFX7LESS-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6182; GFX7LESS-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6183; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
6184; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
6185; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
6186; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
6187; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
6188; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6189; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6190; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[40:41]
6191; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[42:43]
6192; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6193; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6194; GFX7LESS-DPP-NEXT:    buffer_load_dwordx2 v[2:3], off, s[36:39], 0
6195; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], 0
6196; GFX7LESS-DPP-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
6197; GFX7LESS-DPP-NEXT:  .LBB9_1: ; %atomicrmw.start
6198; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
6199; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
6200; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
6201; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
6202; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
6203; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v9, v3
6204; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v8, v2
6205; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, v1
6206; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, v0
6207; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
6208; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
6209; GFX7LESS-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
6210; GFX7LESS-DPP-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
6211; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v6
6212; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v7
6213; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[0:1]
6214; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB9_1
6215; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
6216; GFX7LESS-DPP-NEXT:    s_endpgm
6217;
6218; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6219; GFX9-DPP:       ; %bb.0:
6220; GFX9-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
6221; GFX9-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
6222; GFX9-DPP-NEXT:    s_mov_b32 s38, -1
6223; GFX9-DPP-NEXT:    s_mov_b32 s39, 0xe00000
6224; GFX9-DPP-NEXT:    s_add_u32 s36, s36, s11
6225; GFX9-DPP-NEXT:    s_addc_u32 s37, s37, 0
6226; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
6227; GFX9-DPP-NEXT:    s_mov_b32 s12, s8
6228; GFX9-DPP-NEXT:    s_add_u32 s8, s34, 44
6229; GFX9-DPP-NEXT:    s_mov_b32 s13, s9
6230; GFX9-DPP-NEXT:    s_addc_u32 s9, s35, 0
6231; GFX9-DPP-NEXT:    s_getpc_b64 s[4:5]
6232; GFX9-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6233; GFX9-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6234; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
6235; GFX9-DPP-NEXT:    s_mov_b32 s14, s10
6236; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6237; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
6238; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
6239; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6240; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6241; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
6242; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
6243; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
6244; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
6245; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6246; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6247; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
6248; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
6249; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v6, v4, v1, s[0:1]
6250; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[0:1]
6251; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
6252; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
6253; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0
6254; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v6 row_shr:1 row_mask:0xf bank_mask:0xf
6255; GFX9-DPP-NEXT:    v_mov_b32_dpp v7, v5 row_shr:1 row_mask:0xf bank_mask:0xf
6256; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6257; GFX9-DPP-NEXT:    v_max_f64 v[7:8], v[7:8], v[7:8]
6258; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[7:8]
6259; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
6260; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
6261; GFX9-DPP-NEXT:    s_nop 0
6262; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v6 row_shr:2 row_mask:0xf bank_mask:0xf
6263; GFX9-DPP-NEXT:    v_mov_b32_dpp v7, v5 row_shr:2 row_mask:0xf bank_mask:0xf
6264; GFX9-DPP-NEXT:    v_max_f64 v[7:8], v[7:8], v[7:8]
6265; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[7:8]
6266; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
6267; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
6268; GFX9-DPP-NEXT:    s_nop 0
6269; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v6 row_shr:4 row_mask:0xf bank_mask:0xf
6270; GFX9-DPP-NEXT:    v_mov_b32_dpp v7, v5 row_shr:4 row_mask:0xf bank_mask:0xf
6271; GFX9-DPP-NEXT:    v_max_f64 v[7:8], v[7:8], v[7:8]
6272; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[7:8]
6273; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
6274; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
6275; GFX9-DPP-NEXT:    s_nop 0
6276; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v6 row_shr:8 row_mask:0xf bank_mask:0xf
6277; GFX9-DPP-NEXT:    v_mov_b32_dpp v7, v5 row_shr:8 row_mask:0xf bank_mask:0xf
6278; GFX9-DPP-NEXT:    v_max_f64 v[7:8], v[7:8], v[7:8]
6279; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[7:8]
6280; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
6281; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
6282; GFX9-DPP-NEXT:    s_nop 0
6283; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v6 row_bcast:15 row_mask:0xa bank_mask:0xf
6284; GFX9-DPP-NEXT:    v_mov_b32_dpp v7, v5 row_bcast:15 row_mask:0xa bank_mask:0xf
6285; GFX9-DPP-NEXT:    v_max_f64 v[7:8], v[7:8], v[7:8]
6286; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[7:8]
6287; GFX9-DPP-NEXT:    s_nop 1
6288; GFX9-DPP-NEXT:    v_mov_b32_dpp v4, v6 row_bcast:31 row_mask:0xc bank_mask:0xf
6289; GFX9-DPP-NEXT:    v_mov_b32_dpp v3, v5 row_bcast:31 row_mask:0xc bank_mask:0xf
6290; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[3:4]
6291; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
6292; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
6293; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 0
6294; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
6295; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
6296; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
6297; GFX9-DPP-NEXT:    v_readlane_b32 s3, v4, 63
6298; GFX9-DPP-NEXT:    v_readlane_b32 s2, v3, 63
6299; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
6300; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[2:3]
6301; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
6302; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[2:3], vcc
6303; GFX9-DPP-NEXT:    s_cbranch_execz .LBB9_3
6304; GFX9-DPP-NEXT:  ; %bb.1:
6305; GFX9-DPP-NEXT:    s_load_dwordx2 s[2:3], s[34:35], 0x24
6306; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], 0
6307; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6308; GFX9-DPP-NEXT:    global_load_dwordx2 v[11:12], v0, s[2:3]
6309; GFX9-DPP-NEXT:  .LBB9_2: ; %atomicrmw.start
6310; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
6311; GFX9-DPP-NEXT:    v_max_f64 v[1:2], s[0:1], s[0:1]
6312; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
6313; GFX9-DPP-NEXT:    v_max_f64 v[9:10], v[11:12], v[11:12]
6314; GFX9-DPP-NEXT:    v_max_f64 v[9:10], v[9:10], v[1:2]
6315; GFX9-DPP-NEXT:    global_atomic_cmpswap_x2 v[1:2], v0, v[9:12], s[2:3] glc
6316; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
6317; GFX9-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[1:2], v[11:12]
6318; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, v2
6319; GFX9-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6320; GFX9-DPP-NEXT:    v_mov_b32_e32 v11, v1
6321; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6322; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB9_2
6323; GFX9-DPP-NEXT:  .LBB9_3:
6324; GFX9-DPP-NEXT:    s_endpgm
6325;
6326; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6327; GFX1064-DPP:       ; %bb.0:
6328; GFX1064-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
6329; GFX1064-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
6330; GFX1064-DPP-NEXT:    s_mov_b32 s38, -1
6331; GFX1064-DPP-NEXT:    s_mov_b32 s39, 0x31e16000
6332; GFX1064-DPP-NEXT:    s_add_u32 s36, s36, s11
6333; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
6334; GFX1064-DPP-NEXT:    s_addc_u32 s37, s37, 0
6335; GFX1064-DPP-NEXT:    s_mov_b32 s12, s8
6336; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
6337; GFX1064-DPP-NEXT:    s_mov_b32 s13, s9
6338; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
6339; GFX1064-DPP-NEXT:    s_getpc_b64 s[4:5]
6340; GFX1064-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6341; GFX1064-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6342; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
6343; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
6344; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
6345; GFX1064-DPP-NEXT:    s_mov_b32 s14, s10
6346; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6347; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6348; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6349; GFX1064-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
6350; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
6351; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
6352; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
6353; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6354; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6355; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
6356; GFX1064-DPP-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
6357; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, 0
6358; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v6, 0x7ff80000, v1, s[0:1]
6359; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[0:1]
6360; GFX1064-DPP-NEXT:    v_mov_b32_dpp v4, v6 row_xmask:1 row_mask:0xf bank_mask:0xf
6361; GFX1064-DPP-NEXT:    v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf
6362; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6363; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[3:4]
6364; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
6365; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6366; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0
6367; GFX1064-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:2 row_mask:0xf bank_mask:0xf
6368; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
6369; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6370; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6371; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6372; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0
6373; GFX1064-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:4 row_mask:0xf bank_mask:0xf
6374; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
6375; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6376; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6377; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6378; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 0
6379; GFX1064-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:8 row_mask:0xf bank_mask:0xf
6380; GFX1064-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
6381; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6382; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6383; GFX1064-DPP-NEXT:    v_permlanex16_b32 v6, v4, 0, 0
6384; GFX1064-DPP-NEXT:    v_permlanex16_b32 v5, v3, 0, 0
6385; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6386; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6387; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v4, 0
6388; GFX1064-DPP-NEXT:    v_readlane_b32 s5, v4, 32
6389; GFX1064-DPP-NEXT:    v_readlane_b32 s4, v3, 32
6390; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v3, 0
6391; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], s[4:5], s[4:5]
6392; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], s[2:3], s[2:3]
6393; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
6394; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
6395; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6396; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, 0
6397; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v7, exec_hi, v0
6398; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, v3
6399; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, v4
6400; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6401; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
6402; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB9_2
6403; GFX1064-DPP-NEXT:  ; %bb.1:
6404; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
6405; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6406; GFX1064-DPP-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
6407; GFX1064-DPP-NEXT:  .LBB9_2:
6408; GFX1064-DPP-NEXT:    s_endpgm
6409;
6410; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6411; GFX1032-DPP:       ; %bb.0:
6412; GFX1032-DPP-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
6413; GFX1032-DPP-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
6414; GFX1032-DPP-NEXT:    s_mov_b32 s38, -1
6415; GFX1032-DPP-NEXT:    s_mov_b32 s39, 0x31c16000
6416; GFX1032-DPP-NEXT:    s_add_u32 s36, s36, s11
6417; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
6418; GFX1032-DPP-NEXT:    s_addc_u32 s37, s37, 0
6419; GFX1032-DPP-NEXT:    s_mov_b32 s12, s8
6420; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
6421; GFX1032-DPP-NEXT:    s_mov_b32 s13, s9
6422; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
6423; GFX1032-DPP-NEXT:    s_getpc_b64 s[4:5]
6424; GFX1032-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6425; GFX1032-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6426; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
6427; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
6428; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
6429; GFX1032-DPP-NEXT:    s_mov_b32 s14, s10
6430; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6431; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6432; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6433; GFX1032-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
6434; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
6435; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
6436; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
6437; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6438; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6439; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
6440; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
6441; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, 0
6442; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v6, 0x7ff80000, v1, s0
6443; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s0
6444; GFX1032-DPP-NEXT:    v_mov_b32_dpp v4, v6 row_xmask:1 row_mask:0xf bank_mask:0xf
6445; GFX1032-DPP-NEXT:    v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf
6446; GFX1032-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6447; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[3:4]
6448; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
6449; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6450; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0
6451; GFX1032-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:2 row_mask:0xf bank_mask:0xf
6452; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
6453; GFX1032-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6454; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6455; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6456; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0
6457; GFX1032-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:4 row_mask:0xf bank_mask:0xf
6458; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
6459; GFX1032-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6460; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6461; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
6462; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 0
6463; GFX1032-DPP-NEXT:    v_mov_b32_dpp v6, v4 row_xmask:8 row_mask:0xf bank_mask:0xf
6464; GFX1032-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
6465; GFX1032-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6466; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6467; GFX1032-DPP-NEXT:    v_permlanex16_b32 v6, v4, 0, 0
6468; GFX1032-DPP-NEXT:    v_permlanex16_b32 v5, v3, 0, 0
6469; GFX1032-DPP-NEXT:    v_max_f64 v[5:6], v[5:6], v[5:6]
6470; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[5:6]
6471; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
6472; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, v3
6473; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v7, exec_lo, 0
6474; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, 0
6475; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, v4
6476; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v7
6477; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
6478; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB9_2
6479; GFX1032-DPP-NEXT:  ; %bb.1:
6480; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[34:35], 0x24
6481; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6482; GFX1032-DPP-NEXT:    global_atomic_fmax_x2 v2, v[0:1], s[0:1]
6483; GFX1032-DPP-NEXT:  .LBB9_2:
6484; GFX1032-DPP-NEXT:    s_endpgm
6485;
6486; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6487; GFX1164-DPP:       ; %bb.0:
6488; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
6489; GFX1164-DPP-NEXT:    s_mov_b32 s12, s8
6490; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
6491; GFX1164-DPP-NEXT:    s_mov_b32 s13, s9
6492; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
6493; GFX1164-DPP-NEXT:    s_getpc_b64 s[4:5]
6494; GFX1164-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6495; GFX1164-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6496; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
6497; GFX1164-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
6498; GFX1164-DPP-NEXT:    s_mov_b32 s14, s10
6499; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6500; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6501; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6502; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
6503; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6504; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6505; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
6506; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
6507; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, 0
6508; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s[0:1]
6509; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v4, 0, v0, s[0:1]
6510; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6511; GFX1164-DPP-NEXT:    v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf
6512; GFX1164-DPP-NEXT:    v_mov_b32_dpp v2, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
6513; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6514; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6515; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
6516; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
6517; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
6518; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
6519; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6520; GFX1164-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
6521; GFX1164-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:2 row_mask:0xf bank_mask:0xf
6522; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6523; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6524; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6525; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
6526; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
6527; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6528; GFX1164-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
6529; GFX1164-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:4 row_mask:0xf bank_mask:0xf
6530; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6531; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6532; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6533; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
6534; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
6535; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6536; GFX1164-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
6537; GFX1164-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:8 row_mask:0xf bank_mask:0xf
6538; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6539; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6540; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6541; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6542; GFX1164-DPP-NEXT:    v_permlanex16_b32 v5, v3, 0, 0
6543; GFX1164-DPP-NEXT:    v_permlanex16_b32 v4, v2, 0, 0
6544; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6545; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6546; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6547; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6548; GFX1164-DPP-NEXT:    v_permlane64_b32 v5, v3
6549; GFX1164-DPP-NEXT:    v_permlane64_b32 v4, v2
6550; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6551; GFX1164-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6552; GFX1164-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6553; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
6554; GFX1164-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
6555; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6556; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
6557; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
6558; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v6, exec_hi, v0
6559; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6560; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, v2
6561; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, v3
6562; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v6
6563; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB9_3
6564; GFX1164-DPP-NEXT:  ; %bb.1:
6565; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
6566; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
6567; GFX1164-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
6568; GFX1164-DPP-NEXT:    s_mov_b64 s[2:3], 0
6569; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6570; GFX1164-DPP-NEXT:    global_load_b64 v[8:9], v10, s[0:1]
6571; GFX1164-DPP-NEXT:  .LBB9_2: ; %atomicrmw.start
6572; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
6573; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
6574; GFX1164-DPP-NEXT:    v_max_f64 v[6:7], v[8:9], v[8:9]
6575; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6576; GFX1164-DPP-NEXT:    v_max_f64 v[6:7], v[6:7], v[0:1]
6577; GFX1164-DPP-NEXT:    global_atomic_cmpswap_b64 v[6:7], v10, v[6:9], s[0:1] glc
6578; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
6579; GFX1164-DPP-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[8:9]
6580; GFX1164-DPP-NEXT:    v_mov_b32_e32 v9, v7
6581; GFX1164-DPP-NEXT:    v_mov_b32_e32 v8, v6
6582; GFX1164-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
6583; GFX1164-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6584; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[2:3]
6585; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB9_2
6586; GFX1164-DPP-NEXT:  .LBB9_3:
6587; GFX1164-DPP-NEXT:    s_endpgm
6588;
6589; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
6590; GFX1132-DPP:       ; %bb.0:
6591; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
6592; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
6593; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
6594; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
6595; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
6596; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
6597; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
6598; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
6599; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
6600; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
6601; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
6602; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
6603; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
6604; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
6605; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
6606; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6607; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6608; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
6609; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
6610; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
6611; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v4, 0, v0, s0
6612; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6613; GFX1132-DPP-NEXT:    v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf
6614; GFX1132-DPP-NEXT:    v_mov_b32_dpp v2, v4 row_xmask:1 row_mask:0xf bank_mask:0xf
6615; GFX1132-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6616; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6617; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
6618; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
6619; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0
6620; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6621; GFX1132-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf
6622; GFX1132-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:2 row_mask:0xf bank_mask:0xf
6623; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6624; GFX1132-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6625; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6626; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0
6627; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6628; GFX1132-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf
6629; GFX1132-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:4 row_mask:0xf bank_mask:0xf
6630; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6631; GFX1132-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6632; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6633; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0
6634; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6635; GFX1132-DPP-NEXT:    v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf
6636; GFX1132-DPP-NEXT:    v_mov_b32_dpp v4, v2 row_xmask:8 row_mask:0xf bank_mask:0xf
6637; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6638; GFX1132-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6639; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6640; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
6641; GFX1132-DPP-NEXT:    v_permlanex16_b32 v5, v3, 0, 0
6642; GFX1132-DPP-NEXT:    v_permlanex16_b32 v4, v2, 0, 0
6643; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6644; GFX1132-DPP-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
6645; GFX1132-DPP-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
6646; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
6647; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6648; GFX1132-DPP-NEXT:    v_mov_b32_e32 v0, v2
6649; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v6, exec_lo, 0
6650; GFX1132-DPP-NEXT:    v_dual_mov_b32 v10, 0 :: v_dual_mov_b32 v1, v3
6651; GFX1132-DPP-NEXT:    s_mov_b32 s2, 0
6652; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
6653; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
6654; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v6
6655; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB9_3
6656; GFX1132-DPP-NEXT:  ; %bb.1:
6657; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[34:35], 0x24
6658; GFX1132-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
6659; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
6660; GFX1132-DPP-NEXT:    global_load_b64 v[8:9], v10, s[0:1]
6661; GFX1132-DPP-NEXT:  .LBB9_2: ; %atomicrmw.start
6662; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
6663; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
6664; GFX1132-DPP-NEXT:    v_max_f64 v[6:7], v[8:9], v[8:9]
6665; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6666; GFX1132-DPP-NEXT:    v_max_f64 v[6:7], v[6:7], v[0:1]
6667; GFX1132-DPP-NEXT:    global_atomic_cmpswap_b64 v[6:7], v10, v[6:9], s[0:1] glc
6668; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
6669; GFX1132-DPP-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[8:9]
6670; GFX1132-DPP-NEXT:    v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6
6671; GFX1132-DPP-NEXT:    s_or_b32 s2, vcc_lo, s2
6672; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6673; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s2
6674; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB9_2
6675; GFX1132-DPP-NEXT:  .LBB9_3:
6676; GFX1132-DPP-NEXT:    s_endpgm
6677  %divValue = call double @div.double.value()
6678  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1
6679  ret void
6680}
6681
6682define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
6683; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
6684; GFX7LESS:       ; %bb.0:
6685; GFX7LESS-NEXT:    s_movk_i32 s32, 0x800
6686; GFX7LESS-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
6687; GFX7LESS-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
6688; GFX7LESS-NEXT:    s_mov_b32 s50, -1
6689; GFX7LESS-NEXT:    s_mov_b32 s51, 0xe8f000
6690; GFX7LESS-NEXT:    s_add_u32 s48, s48, s11
6691; GFX7LESS-NEXT:    s_addc_u32 s49, s49, 0
6692; GFX7LESS-NEXT:    s_mov_b64 s[40:41], s[0:1]
6693; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
6694; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
6695; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
6696; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
6697; GFX7LESS-NEXT:    s_cbranch_execz .LBB10_3
6698; GFX7LESS-NEXT:  ; %bb.1:
6699; GFX7LESS-NEXT:    s_mov_b32 s33, s10
6700; GFX7LESS-NEXT:    s_mov_b32 s42, s9
6701; GFX7LESS-NEXT:    s_mov_b32 s43, s8
6702; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[6:7]
6703; GFX7LESS-NEXT:    s_mov_b64 s[36:37], s[4:5]
6704; GFX7LESS-NEXT:    s_mov_b64 s[38:39], s[2:3]
6705; GFX7LESS-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
6706; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
6707; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
6708; GFX7LESS-NEXT:    s_mov_b64 s[46:47], 0
6709; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
6710; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
6711; GFX7LESS-NEXT:    v_or_b32_e32 v3, v0, v1
6712; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
6713; GFX7LESS-NEXT:    v_mov_b32_e32 v0, s0
6714; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s1
6715; GFX7LESS-NEXT:    v_or_b32_e32 v40, v3, v2
6716; GFX7LESS-NEXT:  .LBB10_2: ; %atomicrmw.start
6717; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
6718; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
6719; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
6720; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
6721; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0
6722; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
6723; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
6724; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], 4.0
6725; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
6726; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
6727; GFX7LESS-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
6728; GFX7LESS-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
6729; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
6730; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
6731; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
6732; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
6733; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 8
6734; GFX7LESS-NEXT:    v_mov_b32_e32 v1, 0
6735; GFX7LESS-NEXT:    v_mov_b32_e32 v4, 0
6736; GFX7LESS-NEXT:    v_mov_b32_e32 v5, 8
6737; GFX7LESS-NEXT:    v_mov_b32_e32 v6, 0
6738; GFX7LESS-NEXT:    v_mov_b32_e32 v7, 0
6739; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
6740; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[38:39]
6741; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
6742; GFX7LESS-NEXT:    s_mov_b32 s12, s43
6743; GFX7LESS-NEXT:    s_mov_b32 s13, s42
6744; GFX7LESS-NEXT:    s_mov_b32 s14, s33
6745; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
6746; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
6747; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
6748; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s44
6749; GFX7LESS-NEXT:    v_mov_b32_e32 v3, s45
6750; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
6751; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6752; GFX7LESS-NEXT:    v_and_b32_e32 v2, 1, v0
6753; GFX7LESS-NEXT:    buffer_load_dword v0, off, s[48:51], 0
6754; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
6755; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
6756; GFX7LESS-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
6757; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[46:47]
6758; GFX7LESS-NEXT:    s_cbranch_execnz .LBB10_2
6759; GFX7LESS-NEXT:  .LBB10_3:
6760; GFX7LESS-NEXT:    s_endpgm
6761;
6762; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
6763; GFX9:       ; %bb.0:
6764; GFX9-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
6765; GFX9-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
6766; GFX9-NEXT:    s_mov_b32 s50, -1
6767; GFX9-NEXT:    s_mov_b32 s51, 0xe00000
6768; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
6769; GFX9-NEXT:    s_add_u32 s48, s48, s11
6770; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
6771; GFX9-NEXT:    s_addc_u32 s49, s49, 0
6772; GFX9-NEXT:    s_mov_b64 s[40:41], s[0:1]
6773; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
6774; GFX9-NEXT:    s_movk_i32 s32, 0x800
6775; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
6776; GFX9-NEXT:    s_cbranch_execz .LBB10_3
6777; GFX9-NEXT:  ; %bb.1:
6778; GFX9-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
6779; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
6780; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
6781; GFX9-NEXT:    s_mov_b32 s33, s10
6782; GFX9-NEXT:    s_mov_b32 s42, s9
6783; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6784; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
6785; GFX9-NEXT:    s_mov_b32 s43, s8
6786; GFX9-NEXT:    s_mov_b64 s[34:35], s[6:7]
6787; GFX9-NEXT:    s_mov_b64 s[36:37], s[4:5]
6788; GFX9-NEXT:    s_mov_b64 s[38:39], s[2:3]
6789; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6790; GFX9-NEXT:    v_mov_b32_e32 v2, s1
6791; GFX9-NEXT:    s_mov_b64 s[46:47], 0
6792; GFX9-NEXT:    v_mov_b32_e32 v1, s0
6793; GFX9-NEXT:    v_or3_b32 v40, v0, v4, v3
6794; GFX9-NEXT:  .LBB10_2: ; %atomicrmw.start
6795; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6796; GFX9-NEXT:    s_waitcnt vmcnt(0)
6797; GFX9-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
6798; GFX9-NEXT:    s_add_u32 s8, s36, 44
6799; GFX9-NEXT:    s_addc_u32 s9, s37, 0
6800; GFX9-NEXT:    s_getpc_b64 s[0:1]
6801; GFX9-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
6802; GFX9-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
6803; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
6804; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
6805; GFX9-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
6806; GFX9-NEXT:    buffer_store_dword v1, off, s[48:51], 0
6807; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
6808; GFX9-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
6809; GFX9-NEXT:    s_mov_b64 s[6:7], s[38:39]
6810; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
6811; GFX9-NEXT:    s_mov_b32 s12, s43
6812; GFX9-NEXT:    s_mov_b32 s13, s42
6813; GFX9-NEXT:    s_mov_b32 s14, s33
6814; GFX9-NEXT:    v_mov_b32_e32 v31, v40
6815; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
6816; GFX9-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
6817; GFX9-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
6818; GFX9-NEXT:    v_mov_b32_e32 v0, 8
6819; GFX9-NEXT:    v_mov_b32_e32 v1, 0
6820; GFX9-NEXT:    v_mov_b32_e32 v2, s44
6821; GFX9-NEXT:    v_mov_b32_e32 v3, s45
6822; GFX9-NEXT:    v_mov_b32_e32 v4, 0
6823; GFX9-NEXT:    v_mov_b32_e32 v5, 8
6824; GFX9-NEXT:    v_mov_b32_e32 v6, 0
6825; GFX9-NEXT:    v_mov_b32_e32 v7, 0
6826; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6827; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6828; GFX9-NEXT:    buffer_load_dword v1, off, s[48:51], 0
6829; GFX9-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
6830; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
6831; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
6832; GFX9-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
6833; GFX9-NEXT:    s_andn2_b64 exec, exec, s[46:47]
6834; GFX9-NEXT:    s_cbranch_execnz .LBB10_2
6835; GFX9-NEXT:  .LBB10_3:
6836; GFX9-NEXT:    s_endpgm
6837;
6838; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
6839; GFX1064:       ; %bb.0:
6840; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
6841; GFX1064-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
6842; GFX1064-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
6843; GFX1064-NEXT:    s_mov_b32 s50, -1
6844; GFX1064-NEXT:    s_mov_b32 s51, 0x31e16000
6845; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
6846; GFX1064-NEXT:    s_add_u32 s48, s48, s11
6847; GFX1064-NEXT:    s_addc_u32 s49, s49, 0
6848; GFX1064-NEXT:    s_mov_b64 s[40:41], s[0:1]
6849; GFX1064-NEXT:    s_movk_i32 s32, 0x800
6850; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
6851; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
6852; GFX1064-NEXT:    s_cbranch_execz .LBB10_3
6853; GFX1064-NEXT:  ; %bb.1:
6854; GFX1064-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
6855; GFX1064-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
6856; GFX1064-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
6857; GFX1064-NEXT:    s_mov_b32 s33, s10
6858; GFX1064-NEXT:    s_mov_b32 s42, s9
6859; GFX1064-NEXT:    s_mov_b32 s43, s8
6860; GFX1064-NEXT:    s_mov_b64 s[34:35], s[6:7]
6861; GFX1064-NEXT:    v_or3_b32 v40, v0, v4, v3
6862; GFX1064-NEXT:    s_mov_b64 s[36:37], s[4:5]
6863; GFX1064-NEXT:    s_mov_b64 s[38:39], s[2:3]
6864; GFX1064-NEXT:    s_mov_b64 s[46:47], 0
6865; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
6866; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
6867; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
6868; GFX1064-NEXT:    v_mov_b32_e32 v2, s1
6869; GFX1064-NEXT:    v_mov_b32_e32 v1, s0
6870; GFX1064-NEXT:  .LBB10_2: ; %atomicrmw.start
6871; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
6872; GFX1064-NEXT:    s_waitcnt vmcnt(0)
6873; GFX1064-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
6874; GFX1064-NEXT:    s_add_u32 s8, s36, 44
6875; GFX1064-NEXT:    s_addc_u32 s9, s37, 0
6876; GFX1064-NEXT:    s_getpc_b64 s[0:1]
6877; GFX1064-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
6878; GFX1064-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
6879; GFX1064-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
6880; GFX1064-NEXT:    buffer_store_dword v1, off, s[48:51], 0
6881; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
6882; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
6883; GFX1064-NEXT:    v_mov_b32_e32 v0, 8
6884; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
6885; GFX1064-NEXT:    v_mov_b32_e32 v2, s44
6886; GFX1064-NEXT:    v_mov_b32_e32 v5, 8
6887; GFX1064-NEXT:    v_mov_b32_e32 v6, 0
6888; GFX1064-NEXT:    v_mov_b32_e32 v7, 0
6889; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
6890; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
6891; GFX1064-NEXT:    s_mov_b64 s[6:7], s[38:39]
6892; GFX1064-NEXT:    s_mov_b64 s[10:11], s[34:35]
6893; GFX1064-NEXT:    s_mov_b32 s12, s43
6894; GFX1064-NEXT:    s_mov_b32 s13, s42
6895; GFX1064-NEXT:    s_mov_b32 s14, s33
6896; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
6897; GFX1064-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
6898; GFX1064-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
6899; GFX1064-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
6900; GFX1064-NEXT:    v_mov_b32_e32 v3, s45
6901; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
6902; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
6903; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6904; GFX1064-NEXT:    s_clause 0x1
6905; GFX1064-NEXT:    buffer_load_dword v1, off, s[48:51], 0
6906; GFX1064-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
6907; GFX1064-NEXT:    v_and_b32_e32 v0, 1, v0
6908; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
6909; GFX1064-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
6910; GFX1064-NEXT:    s_andn2_b64 exec, exec, s[46:47]
6911; GFX1064-NEXT:    s_cbranch_execnz .LBB10_2
6912; GFX1064-NEXT:  .LBB10_3:
6913; GFX1064-NEXT:    s_endpgm
6914;
6915; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
6916; GFX1032:       ; %bb.0:
6917; GFX1032-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
6918; GFX1032-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
6919; GFX1032-NEXT:    s_mov_b32 s50, -1
6920; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
6921; GFX1032-NEXT:    s_mov_b32 s51, 0x31c16000
6922; GFX1032-NEXT:    s_add_u32 s48, s48, s11
6923; GFX1032-NEXT:    s_addc_u32 s49, s49, 0
6924; GFX1032-NEXT:    s_mov_b64 s[40:41], s[0:1]
6925; GFX1032-NEXT:    s_mov_b32 s46, 0
6926; GFX1032-NEXT:    s_movk_i32 s32, 0x400
6927; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
6928; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
6929; GFX1032-NEXT:    s_cbranch_execz .LBB10_3
6930; GFX1032-NEXT:  ; %bb.1:
6931; GFX1032-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
6932; GFX1032-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
6933; GFX1032-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
6934; GFX1032-NEXT:    s_mov_b32 s33, s10
6935; GFX1032-NEXT:    s_mov_b32 s42, s9
6936; GFX1032-NEXT:    s_mov_b32 s43, s8
6937; GFX1032-NEXT:    s_mov_b64 s[34:35], s[6:7]
6938; GFX1032-NEXT:    v_or3_b32 v40, v0, v4, v3
6939; GFX1032-NEXT:    s_mov_b64 s[36:37], s[4:5]
6940; GFX1032-NEXT:    s_mov_b64 s[38:39], s[2:3]
6941; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
6942; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
6943; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
6944; GFX1032-NEXT:    v_mov_b32_e32 v2, s1
6945; GFX1032-NEXT:    v_mov_b32_e32 v1, s0
6946; GFX1032-NEXT:  .LBB10_2: ; %atomicrmw.start
6947; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
6948; GFX1032-NEXT:    s_waitcnt vmcnt(0)
6949; GFX1032-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
6950; GFX1032-NEXT:    s_add_u32 s8, s36, 44
6951; GFX1032-NEXT:    s_addc_u32 s9, s37, 0
6952; GFX1032-NEXT:    s_getpc_b64 s[0:1]
6953; GFX1032-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
6954; GFX1032-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
6955; GFX1032-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
6956; GFX1032-NEXT:    buffer_store_dword v1, off, s[48:51], 0
6957; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
6958; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
6959; GFX1032-NEXT:    v_mov_b32_e32 v0, 8
6960; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
6961; GFX1032-NEXT:    v_mov_b32_e32 v2, s44
6962; GFX1032-NEXT:    v_mov_b32_e32 v5, 8
6963; GFX1032-NEXT:    v_mov_b32_e32 v6, 0
6964; GFX1032-NEXT:    v_mov_b32_e32 v7, 0
6965; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
6966; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
6967; GFX1032-NEXT:    s_mov_b64 s[6:7], s[38:39]
6968; GFX1032-NEXT:    s_mov_b64 s[10:11], s[34:35]
6969; GFX1032-NEXT:    s_mov_b32 s12, s43
6970; GFX1032-NEXT:    s_mov_b32 s13, s42
6971; GFX1032-NEXT:    s_mov_b32 s14, s33
6972; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
6973; GFX1032-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
6974; GFX1032-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
6975; GFX1032-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
6976; GFX1032-NEXT:    v_mov_b32_e32 v3, s45
6977; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
6978; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
6979; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
6980; GFX1032-NEXT:    s_clause 0x1
6981; GFX1032-NEXT:    buffer_load_dword v1, off, s[48:51], 0
6982; GFX1032-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
6983; GFX1032-NEXT:    v_and_b32_e32 v0, 1, v0
6984; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
6985; GFX1032-NEXT:    s_or_b32 s46, vcc_lo, s46
6986; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
6987; GFX1032-NEXT:    s_cbranch_execnz .LBB10_2
6988; GFX1032-NEXT:  .LBB10_3:
6989; GFX1032-NEXT:    s_endpgm
6990;
6991; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
6992; GFX1164:       ; %bb.0:
6993; GFX1164-NEXT:    v_mov_b32_e32 v40, v0
6994; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
6995; GFX1164-NEXT:    s_mov_b64 s[40:41], s[0:1]
6996; GFX1164-NEXT:    s_mov_b32 s32, 32
6997; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
6998; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6999; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
7000; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
7001; GFX1164-NEXT:    s_cbranch_execz .LBB10_3
7002; GFX1164-NEXT:  ; %bb.1:
7003; GFX1164-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
7004; GFX1164-NEXT:    s_mov_b32 s33, s10
7005; GFX1164-NEXT:    s_mov_b32 s42, s9
7006; GFX1164-NEXT:    s_mov_b32 s43, s8
7007; GFX1164-NEXT:    s_mov_b64 s[34:35], s[6:7]
7008; GFX1164-NEXT:    s_mov_b64 s[36:37], s[4:5]
7009; GFX1164-NEXT:    s_mov_b64 s[38:39], s[2:3]
7010; GFX1164-NEXT:    s_mov_b64 s[46:47], 0
7011; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
7012; GFX1164-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
7013; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
7014; GFX1164-NEXT:    v_mov_b32_e32 v2, s1
7015; GFX1164-NEXT:    v_mov_b32_e32 v1, s0
7016; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x1
7017; GFX1164-NEXT:    .p2align 6
7018; GFX1164-NEXT:  .LBB10_2: ; %atomicrmw.start
7019; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
7020; GFX1164-NEXT:    s_waitcnt vmcnt(0)
7021; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7022; GFX1164-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7023; GFX1164-NEXT:    s_add_u32 s8, s36, 44
7024; GFX1164-NEXT:    s_addc_u32 s9, s37, 0
7025; GFX1164-NEXT:    s_getpc_b64 s[0:1]
7026; GFX1164-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7027; GFX1164-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7028; GFX1164-NEXT:    v_mov_b32_e32 v31, v40
7029; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
7030; GFX1164-NEXT:    v_mov_b32_e32 v0, 8
7031; GFX1164-NEXT:    v_mov_b32_e32 v5, 8
7032; GFX1164-NEXT:    v_mov_b32_e32 v6, 0
7033; GFX1164-NEXT:    v_mov_b32_e32 v7, 0
7034; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
7035; GFX1164-NEXT:    s_mov_b64 s[6:7], s[38:39]
7036; GFX1164-NEXT:    s_mov_b64 s[10:11], s[34:35]
7037; GFX1164-NEXT:    s_mov_b32 s12, s43
7038; GFX1164-NEXT:    s_mov_b32 s13, s42
7039; GFX1164-NEXT:    s_mov_b32 s14, s33
7040; GFX1164-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7041; GFX1164-NEXT:    scratch_store_b64 off, v[1:2], off
7042; GFX1164-NEXT:    v_mov_b32_e32 v1, 0
7043; GFX1164-NEXT:    v_mov_b32_e32 v2, s44
7044; GFX1164-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
7045; GFX1164-NEXT:    v_mov_b32_e32 v3, s45
7046; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
7047; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
7048; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7049; GFX1164-NEXT:    scratch_load_b64 v[1:2], off, off
7050; GFX1164-NEXT:    v_and_b32_e32 v0, 1, v0
7051; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
7052; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7053; GFX1164-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7054; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
7055; GFX1164-NEXT:    s_cbranch_execnz .LBB10_2
7056; GFX1164-NEXT:  .LBB10_3:
7057; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x2
7058; GFX1164-NEXT:    s_endpgm
7059;
7060; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7061; GFX1132:       ; %bb.0:
7062; GFX1132-NEXT:    v_mov_b32_e32 v40, v0
7063; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7064; GFX1132-NEXT:    s_mov_b64 s[40:41], s[0:1]
7065; GFX1132-NEXT:    s_mov_b32 s46, 0
7066; GFX1132-NEXT:    s_mov_b32 s32, 32
7067; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
7068; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
7069; GFX1132-NEXT:    s_cbranch_execz .LBB10_3
7070; GFX1132-NEXT:  ; %bb.1:
7071; GFX1132-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
7072; GFX1132-NEXT:    s_mov_b32 s33, s15
7073; GFX1132-NEXT:    s_mov_b32 s42, s14
7074; GFX1132-NEXT:    s_mov_b32 s43, s13
7075; GFX1132-NEXT:    s_mov_b64 s[34:35], s[6:7]
7076; GFX1132-NEXT:    s_mov_b64 s[36:37], s[4:5]
7077; GFX1132-NEXT:    s_mov_b64 s[38:39], s[2:3]
7078; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
7079; GFX1132-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
7080; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
7081; GFX1132-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
7082; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x1
7083; GFX1132-NEXT:    .p2align 6
7084; GFX1132-NEXT:  .LBB10_2: ; %atomicrmw.start
7085; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
7086; GFX1132-NEXT:    s_waitcnt vmcnt(0)
7087; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7088; GFX1132-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7089; GFX1132-NEXT:    s_add_u32 s8, s36, 44
7090; GFX1132-NEXT:    s_addc_u32 s9, s37, 0
7091; GFX1132-NEXT:    s_getpc_b64 s[0:1]
7092; GFX1132-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7093; GFX1132-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7094; GFX1132-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
7095; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
7096; GFX1132-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
7097; GFX1132-NEXT:    v_mov_b32_e32 v7, 0
7098; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
7099; GFX1132-NEXT:    s_mov_b64 s[6:7], s[38:39]
7100; GFX1132-NEXT:    s_mov_b64 s[10:11], s[34:35]
7101; GFX1132-NEXT:    s_mov_b32 s12, s43
7102; GFX1132-NEXT:    s_mov_b32 s13, s42
7103; GFX1132-NEXT:    s_mov_b32 s14, s33
7104; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_4)
7105; GFX1132-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7106; GFX1132-NEXT:    scratch_store_b64 off, v[1:2], off
7107; GFX1132-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
7108; GFX1132-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
7109; GFX1132-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
7110; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
7111; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7112; GFX1132-NEXT:    scratch_load_b64 v[1:2], off, off
7113; GFX1132-NEXT:    v_and_b32_e32 v0, 1, v0
7114; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
7115; GFX1132-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
7116; GFX1132-NEXT:    s_or_b32 s46, vcc_lo, s46
7117; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
7118; GFX1132-NEXT:    s_cbranch_execnz .LBB10_2
7119; GFX1132-NEXT:  .LBB10_3:
7120; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x2
7121; GFX1132-NEXT:    s_endpgm
7122;
7123; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7124; GFX7LESS-DPP:       ; %bb.0:
7125; GFX7LESS-DPP-NEXT:    s_movk_i32 s32, 0x800
7126; GFX7LESS-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7127; GFX7LESS-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7128; GFX7LESS-DPP-NEXT:    s_mov_b32 s50, -1
7129; GFX7LESS-DPP-NEXT:    s_mov_b32 s51, 0xe8f000
7130; GFX7LESS-DPP-NEXT:    s_add_u32 s48, s48, s11
7131; GFX7LESS-DPP-NEXT:    s_addc_u32 s49, s49, 0
7132; GFX7LESS-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7133; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
7134; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
7135; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
7136; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7137; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB10_3
7138; GFX7LESS-DPP-NEXT:  ; %bb.1:
7139; GFX7LESS-DPP-NEXT:    s_mov_b32 s33, s10
7140; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, s9
7141; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, s8
7142; GFX7LESS-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7143; GFX7LESS-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7144; GFX7LESS-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7145; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
7146; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7147; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
7148; GFX7LESS-DPP-NEXT:    s_mov_b64 s[46:47], 0
7149; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
7150; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
7151; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v3, v0, v1
7152; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7153; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, s0
7154; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s1
7155; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v40, v3, v2
7156; GFX7LESS-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7157; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7158; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
7159; GFX7LESS-DPP-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
7160; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
7161; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0
7162; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
7163; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
7164; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], 4.0
7165; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
7166; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
7167; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7168; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7169; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
7170; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
7171; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7172; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
7173; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, 8
7174; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, 0
7175; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, 0
7176; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v5, 8
7177; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, 0
7178; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, 0
7179; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7180; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7181; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7182; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
7183; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
7184; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
7185; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
7186; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
7187; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
7188; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, s44
7189; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, s45
7190; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7191; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7192; GFX7LESS-DPP-NEXT:    v_and_b32_e32 v2, 1, v0
7193; GFX7LESS-DPP-NEXT:    buffer_load_dword v0, off, s[48:51], 0
7194; GFX7LESS-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
7195; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
7196; GFX7LESS-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7197; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7198; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7199; GFX7LESS-DPP-NEXT:  .LBB10_3:
7200; GFX7LESS-DPP-NEXT:    s_endpgm
7201;
7202; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7203; GFX9-DPP:       ; %bb.0:
7204; GFX9-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7205; GFX9-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7206; GFX9-DPP-NEXT:    s_mov_b32 s50, -1
7207; GFX9-DPP-NEXT:    s_mov_b32 s51, 0xe00000
7208; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
7209; GFX9-DPP-NEXT:    s_add_u32 s48, s48, s11
7210; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
7211; GFX9-DPP-NEXT:    s_addc_u32 s49, s49, 0
7212; GFX9-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7213; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
7214; GFX9-DPP-NEXT:    s_movk_i32 s32, 0x800
7215; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7216; GFX9-DPP-NEXT:    s_cbranch_execz .LBB10_3
7217; GFX9-DPP-NEXT:  ; %bb.1:
7218; GFX9-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
7219; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
7220; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
7221; GFX9-DPP-NEXT:    s_mov_b32 s33, s10
7222; GFX9-DPP-NEXT:    s_mov_b32 s42, s9
7223; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7224; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
7225; GFX9-DPP-NEXT:    s_mov_b32 s43, s8
7226; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7227; GFX9-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7228; GFX9-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7229; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7230; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s1
7231; GFX9-DPP-NEXT:    s_mov_b64 s[46:47], 0
7232; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s0
7233; GFX9-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
7234; GFX9-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7235; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7236; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
7237; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7238; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
7239; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
7240; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
7241; GFX9-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7242; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7243; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7244; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
7245; GFX9-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
7246; GFX9-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
7247; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7248; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7249; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7250; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7251; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
7252; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
7253; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
7254; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
7255; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
7256; GFX9-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
7257; GFX9-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
7258; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 8
7259; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, 0
7260; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s44
7261; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, s45
7262; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0
7263; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 8
7264; GFX9-DPP-NEXT:    v_mov_b32_e32 v6, 0
7265; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
7266; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7267; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7268; GFX9-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
7269; GFX9-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
7270; GFX9-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
7271; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7272; GFX9-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7273; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7274; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7275; GFX9-DPP-NEXT:  .LBB10_3:
7276; GFX9-DPP-NEXT:    s_endpgm
7277;
7278; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7279; GFX1064-DPP:       ; %bb.0:
7280; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
7281; GFX1064-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7282; GFX1064-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7283; GFX1064-DPP-NEXT:    s_mov_b32 s50, -1
7284; GFX1064-DPP-NEXT:    s_mov_b32 s51, 0x31e16000
7285; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
7286; GFX1064-DPP-NEXT:    s_add_u32 s48, s48, s11
7287; GFX1064-DPP-NEXT:    s_addc_u32 s49, s49, 0
7288; GFX1064-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7289; GFX1064-DPP-NEXT:    s_movk_i32 s32, 0x800
7290; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
7291; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7292; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB10_3
7293; GFX1064-DPP-NEXT:  ; %bb.1:
7294; GFX1064-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
7295; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
7296; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
7297; GFX1064-DPP-NEXT:    s_mov_b32 s33, s10
7298; GFX1064-DPP-NEXT:    s_mov_b32 s42, s9
7299; GFX1064-DPP-NEXT:    s_mov_b32 s43, s8
7300; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7301; GFX1064-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
7302; GFX1064-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7303; GFX1064-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7304; GFX1064-DPP-NEXT:    s_mov_b64 s[46:47], 0
7305; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7306; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
7307; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7308; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s1
7309; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, s0
7310; GFX1064-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7311; GFX1064-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7312; GFX1064-DPP-NEXT:    s_waitcnt vmcnt(0)
7313; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7314; GFX1064-DPP-NEXT:    s_add_u32 s8, s36, 44
7315; GFX1064-DPP-NEXT:    s_addc_u32 s9, s37, 0
7316; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
7317; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7318; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7319; GFX1064-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
7320; GFX1064-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
7321; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7322; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
7323; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 8
7324; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
7325; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s44
7326; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 8
7327; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0
7328; GFX1064-DPP-NEXT:    v_mov_b32_e32 v7, 0
7329; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
7330; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7331; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7332; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7333; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
7334; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
7335; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
7336; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
7337; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7338; GFX1064-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
7339; GFX1064-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
7340; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, s45
7341; GFX1064-DPP-NEXT:    v_mov_b32_e32 v4, 0
7342; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7343; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7344; GFX1064-DPP-NEXT:    s_clause 0x1
7345; GFX1064-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
7346; GFX1064-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
7347; GFX1064-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
7348; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7349; GFX1064-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7350; GFX1064-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7351; GFX1064-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7352; GFX1064-DPP-NEXT:  .LBB10_3:
7353; GFX1064-DPP-NEXT:    s_endpgm
7354;
7355; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7356; GFX1032-DPP:       ; %bb.0:
7357; GFX1032-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7358; GFX1032-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7359; GFX1032-DPP-NEXT:    s_mov_b32 s50, -1
7360; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
7361; GFX1032-DPP-NEXT:    s_mov_b32 s51, 0x31c16000
7362; GFX1032-DPP-NEXT:    s_add_u32 s48, s48, s11
7363; GFX1032-DPP-NEXT:    s_addc_u32 s49, s49, 0
7364; GFX1032-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7365; GFX1032-DPP-NEXT:    s_mov_b32 s46, 0
7366; GFX1032-DPP-NEXT:    s_movk_i32 s32, 0x400
7367; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
7368; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
7369; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB10_3
7370; GFX1032-DPP-NEXT:  ; %bb.1:
7371; GFX1032-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x24
7372; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v3, 20, v2
7373; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v4, 10, v1
7374; GFX1032-DPP-NEXT:    s_mov_b32 s33, s10
7375; GFX1032-DPP-NEXT:    s_mov_b32 s42, s9
7376; GFX1032-DPP-NEXT:    s_mov_b32 s43, s8
7377; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7378; GFX1032-DPP-NEXT:    v_or3_b32 v40, v0, v4, v3
7379; GFX1032-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7380; GFX1032-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7381; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7382; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[44:45], 0x0
7383; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7384; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s1
7385; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, s0
7386; GFX1032-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7387; GFX1032-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7388; GFX1032-DPP-NEXT:    s_waitcnt vmcnt(0)
7389; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7390; GFX1032-DPP-NEXT:    s_add_u32 s8, s36, 44
7391; GFX1032-DPP-NEXT:    s_addc_u32 s9, s37, 0
7392; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
7393; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7394; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7395; GFX1032-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
7396; GFX1032-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
7397; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7398; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
7399; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 8
7400; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
7401; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s44
7402; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 8
7403; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0
7404; GFX1032-DPP-NEXT:    v_mov_b32_e32 v7, 0
7405; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
7406; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7407; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7408; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7409; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
7410; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
7411; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
7412; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
7413; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7414; GFX1032-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
7415; GFX1032-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
7416; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, s45
7417; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, 0
7418; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7419; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7420; GFX1032-DPP-NEXT:    s_clause 0x1
7421; GFX1032-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
7422; GFX1032-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
7423; GFX1032-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
7424; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
7425; GFX1032-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
7426; GFX1032-DPP-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
7427; GFX1032-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7428; GFX1032-DPP-NEXT:  .LBB10_3:
7429; GFX1032-DPP-NEXT:    s_endpgm
7430;
7431; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7432; GFX1164-DPP:       ; %bb.0:
7433; GFX1164-DPP-NEXT:    v_mov_b32_e32 v40, v0
7434; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7435; GFX1164-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7436; GFX1164-DPP-NEXT:    s_mov_b32 s32, 32
7437; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
7438; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7439; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
7440; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
7441; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB10_3
7442; GFX1164-DPP-NEXT:  ; %bb.1:
7443; GFX1164-DPP-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
7444; GFX1164-DPP-NEXT:    s_mov_b32 s33, s10
7445; GFX1164-DPP-NEXT:    s_mov_b32 s42, s9
7446; GFX1164-DPP-NEXT:    s_mov_b32 s43, s8
7447; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7448; GFX1164-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7449; GFX1164-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7450; GFX1164-DPP-NEXT:    s_mov_b64 s[46:47], 0
7451; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7452; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
7453; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7454; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s1
7455; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, s0
7456; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
7457; GFX1164-DPP-NEXT:    .p2align 6
7458; GFX1164-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7459; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7460; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
7461; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7462; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7463; GFX1164-DPP-NEXT:    s_add_u32 s8, s36, 44
7464; GFX1164-DPP-NEXT:    s_addc_u32 s9, s37, 0
7465; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
7466; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7467; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7468; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v40
7469; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
7470; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 8
7471; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 8
7472; GFX1164-DPP-NEXT:    v_mov_b32_e32 v6, 0
7473; GFX1164-DPP-NEXT:    v_mov_b32_e32 v7, 0
7474; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7475; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7476; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7477; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
7478; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
7479; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
7480; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7481; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
7482; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0
7483; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s44
7484; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
7485; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, s45
7486; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
7487; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7488; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7489; GFX1164-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
7490; GFX1164-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
7491; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
7492; GFX1164-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7493; GFX1164-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7494; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
7495; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7496; GFX1164-DPP-NEXT:  .LBB10_3:
7497; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
7498; GFX1164-DPP-NEXT:    s_endpgm
7499;
7500; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
7501; GFX1132-DPP:       ; %bb.0:
7502; GFX1132-DPP-NEXT:    v_mov_b32_e32 v40, v0
7503; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7504; GFX1132-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
7505; GFX1132-DPP-NEXT:    s_mov_b32 s46, 0
7506; GFX1132-DPP-NEXT:    s_mov_b32 s32, 32
7507; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
7508; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
7509; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB10_3
7510; GFX1132-DPP-NEXT:  ; %bb.1:
7511; GFX1132-DPP-NEXT:    s_load_b64 s[44:45], s[4:5], 0x24
7512; GFX1132-DPP-NEXT:    s_mov_b32 s33, s15
7513; GFX1132-DPP-NEXT:    s_mov_b32 s42, s14
7514; GFX1132-DPP-NEXT:    s_mov_b32 s43, s13
7515; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
7516; GFX1132-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
7517; GFX1132-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
7518; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7519; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[44:45], 0x0
7520; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7521; GFX1132-DPP-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
7522; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
7523; GFX1132-DPP-NEXT:    .p2align 6
7524; GFX1132-DPP-NEXT:  .LBB10_2: ; %atomicrmw.start
7525; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
7526; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
7527; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7528; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
7529; GFX1132-DPP-NEXT:    s_add_u32 s8, s36, 44
7530; GFX1132-DPP-NEXT:    s_addc_u32 s9, s37, 0
7531; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
7532; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7533; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7534; GFX1132-DPP-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
7535; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
7536; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
7537; GFX1132-DPP-NEXT:    v_mov_b32_e32 v7, 0
7538; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
7539; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
7540; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
7541; GFX1132-DPP-NEXT:    s_mov_b32 s12, s43
7542; GFX1132-DPP-NEXT:    s_mov_b32 s13, s42
7543; GFX1132-DPP-NEXT:    s_mov_b32 s14, s33
7544; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_4)
7545; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], 4.0
7546; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
7547; GFX1132-DPP-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
7548; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
7549; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
7550; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
7551; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7552; GFX1132-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
7553; GFX1132-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
7554; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
7555; GFX1132-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
7556; GFX1132-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
7557; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
7558; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB10_2
7559; GFX1132-DPP-NEXT:  .LBB10_3:
7560; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
7561; GFX1132-DPP-NEXT:    s_endpgm
7562  %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1
7563  ret void
7564}
7565
7566define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
7567; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
7568; GFX7LESS:       ; %bb.0:
7569; GFX7LESS-NEXT:    s_movk_i32 s32, 0x800
7570; GFX7LESS-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7571; GFX7LESS-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7572; GFX7LESS-NEXT:    s_mov_b32 s50, -1
7573; GFX7LESS-NEXT:    s_mov_b32 s51, 0xe8f000
7574; GFX7LESS-NEXT:    s_add_u32 s48, s48, s11
7575; GFX7LESS-NEXT:    s_addc_u32 s49, s49, 0
7576; GFX7LESS-NEXT:    s_mov_b32 s33, s10
7577; GFX7LESS-NEXT:    s_mov_b32 s42, s9
7578; GFX7LESS-NEXT:    s_mov_b32 s43, s8
7579; GFX7LESS-NEXT:    s_mov_b64 s[34:35], s[6:7]
7580; GFX7LESS-NEXT:    s_mov_b64 s[36:37], s[4:5]
7581; GFX7LESS-NEXT:    s_mov_b64 s[38:39], s[2:3]
7582; GFX7LESS-NEXT:    s_mov_b64 s[40:41], s[0:1]
7583; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
7584; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
7585; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
7586; GFX7LESS-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
7587; GFX7LESS-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
7588; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7589; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
7590; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
7591; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
7592; GFX7LESS-NEXT:    v_or_b32_e32 v40, v0, v2
7593; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
7594; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
7595; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
7596; GFX7LESS-NEXT:    s_mov_b32 s12, s43
7597; GFX7LESS-NEXT:    s_mov_b32 s13, s42
7598; GFX7LESS-NEXT:    s_mov_b32 s14, s33
7599; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
7600; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
7601; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
7602; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
7603; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7604; GFX7LESS-NEXT:    s_mov_b64 s[0:1], exec
7605; GFX7LESS-NEXT:    v_mov_b32_e32 v2, 0
7606; GFX7LESS-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
7607; GFX7LESS-NEXT:  .LBB11_1: ; %ComputeLoop
7608; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
7609; GFX7LESS-NEXT:    s_ff1_i32_b64 s4, s[0:1]
7610; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
7611; GFX7LESS-NEXT:    v_readlane_b32 s3, v1, s4
7612; GFX7LESS-NEXT:    v_readlane_b32 s2, v0, s4
7613; GFX7LESS-NEXT:    s_lshl_b64 s[4:5], 1, s4
7614; GFX7LESS-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
7615; GFX7LESS-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
7616; GFX7LESS-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[0:1], 0
7617; GFX7LESS-NEXT:    s_and_b64 vcc, exec, s[2:3]
7618; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
7619; GFX7LESS-NEXT:    s_cbranch_vccnz .LBB11_1
7620; GFX7LESS-NEXT:  ; %bb.2: ; %ComputeEnd
7621; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
7622; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
7623; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
7624; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7625; GFX7LESS-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
7626; GFX7LESS-NEXT:    s_cbranch_execz .LBB11_5
7627; GFX7LESS-NEXT:  ; %bb.3:
7628; GFX7LESS-NEXT:    s_load_dwordx2 s[44:45], s[36:37], 0x9
7629; GFX7LESS-NEXT:    s_mov_b32 s47, 0xf000
7630; GFX7LESS-NEXT:    s_mov_b32 s46, -1
7631; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
7632; GFX7LESS-NEXT:    buffer_load_dwordx2 v[0:1], off, s[44:47], 0
7633; GFX7LESS-NEXT:    s_mov_b64 s[46:47], 0
7634; GFX7LESS-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
7635; GFX7LESS-NEXT:  .LBB11_4: ; %atomicrmw.start
7636; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
7637; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
7638; GFX7LESS-NEXT:    v_max_f64 v[2:3], v[0:1], v[0:1]
7639; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:4
7640; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0
7641; GFX7LESS-NEXT:    s_add_u32 s8, s36, 44
7642; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
7643; GFX7LESS-NEXT:    v_max_f64 v[0:1], v[2:3], v[41:42]
7644; GFX7LESS-NEXT:    s_addc_u32 s9, s37, 0
7645; GFX7LESS-NEXT:    s_getpc_b64 s[0:1]
7646; GFX7LESS-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7647; GFX7LESS-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7648; GFX7LESS-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
7649; GFX7LESS-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
7650; GFX7LESS-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7651; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
7652; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 8
7653; GFX7LESS-NEXT:    v_mov_b32_e32 v1, 0
7654; GFX7LESS-NEXT:    v_mov_b32_e32 v4, 0
7655; GFX7LESS-NEXT:    v_mov_b32_e32 v5, 8
7656; GFX7LESS-NEXT:    v_mov_b32_e32 v6, 0
7657; GFX7LESS-NEXT:    v_mov_b32_e32 v7, 0
7658; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[40:41]
7659; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[38:39]
7660; GFX7LESS-NEXT:    s_mov_b64 s[10:11], s[34:35]
7661; GFX7LESS-NEXT:    s_mov_b32 s12, s43
7662; GFX7LESS-NEXT:    s_mov_b32 s13, s42
7663; GFX7LESS-NEXT:    s_mov_b32 s14, s33
7664; GFX7LESS-NEXT:    v_mov_b32_e32 v31, v40
7665; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[48:49]
7666; GFX7LESS-NEXT:    s_mov_b64 s[2:3], s[50:51]
7667; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s44
7668; GFX7LESS-NEXT:    v_mov_b32_e32 v3, s45
7669; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
7670; GFX7LESS-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7671; GFX7LESS-NEXT:    v_and_b32_e32 v2, 1, v0
7672; GFX7LESS-NEXT:    buffer_load_dword v0, off, s[48:51], 0
7673; GFX7LESS-NEXT:    buffer_load_dword v1, off, s[48:51], 0 offset:4
7674; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
7675; GFX7LESS-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7676; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7677; GFX7LESS-NEXT:    s_cbranch_execnz .LBB11_4
7678; GFX7LESS-NEXT:  .LBB11_5:
7679; GFX7LESS-NEXT:    s_endpgm
7680;
7681; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
7682; GFX9:       ; %bb.0:
7683; GFX9-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7684; GFX9-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7685; GFX9-NEXT:    s_mov_b32 s50, -1
7686; GFX9-NEXT:    s_mov_b32 s51, 0xe00000
7687; GFX9-NEXT:    s_add_u32 s48, s48, s11
7688; GFX9-NEXT:    s_addc_u32 s49, s49, 0
7689; GFX9-NEXT:    s_mov_b64 s[36:37], s[4:5]
7690; GFX9-NEXT:    s_mov_b32 s43, s8
7691; GFX9-NEXT:    s_add_u32 s8, s36, 44
7692; GFX9-NEXT:    s_mov_b32 s42, s9
7693; GFX9-NEXT:    s_addc_u32 s9, s37, 0
7694; GFX9-NEXT:    s_mov_b64 s[40:41], s[0:1]
7695; GFX9-NEXT:    s_getpc_b64 s[0:1]
7696; GFX9-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
7697; GFX9-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
7698; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7699; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
7700; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
7701; GFX9-NEXT:    s_mov_b32 s33, s10
7702; GFX9-NEXT:    s_mov_b64 s[34:35], s[6:7]
7703; GFX9-NEXT:    s_mov_b64 s[38:39], s[2:3]
7704; GFX9-NEXT:    v_or3_b32 v40, v0, v1, v2
7705; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
7706; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
7707; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
7708; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
7709; GFX9-NEXT:    s_mov_b32 s12, s43
7710; GFX9-NEXT:    s_mov_b32 s13, s42
7711; GFX9-NEXT:    s_mov_b32 s14, s33
7712; GFX9-NEXT:    v_mov_b32_e32 v31, v40
7713; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
7714; GFX9-NEXT:    s_movk_i32 s32, 0x800
7715; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7716; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7717; GFX9-NEXT:    v_mov_b32_e32 v2, 0
7718; GFX9-NEXT:    s_mov_b64 s[0:1], exec
7719; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
7720; GFX9-NEXT:  .LBB11_1: ; %ComputeLoop
7721; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7722; GFX9-NEXT:    s_ff1_i32_b64 s4, s[0:1]
7723; GFX9-NEXT:    v_readlane_b32 s3, v1, s4
7724; GFX9-NEXT:    v_readlane_b32 s2, v0, s4
7725; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
7726; GFX9-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
7727; GFX9-NEXT:    s_lshl_b64 s[2:3], 1, s4
7728; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
7729; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
7730; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
7731; GFX9-NEXT:    s_cbranch_scc1 .LBB11_1
7732; GFX9-NEXT:  ; %bb.2: ; %ComputeEnd
7733; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7734; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
7735; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
7736; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7737; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
7738; GFX9-NEXT:    s_cbranch_execz .LBB11_5
7739; GFX9-NEXT:  ; %bb.3:
7740; GFX9-NEXT:    s_load_dwordx2 s[44:45], s[36:37], 0x24
7741; GFX9-NEXT:    v_mov_b32_e32 v0, 0
7742; GFX9-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
7743; GFX9-NEXT:    s_mov_b64 s[46:47], 0
7744; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7745; GFX9-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
7746; GFX9-NEXT:  .LBB11_4: ; %atomicrmw.start
7747; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7748; GFX9-NEXT:    s_waitcnt vmcnt(0)
7749; GFX9-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
7750; GFX9-NEXT:    s_add_u32 s8, s36, 44
7751; GFX9-NEXT:    s_addc_u32 s9, s37, 0
7752; GFX9-NEXT:    s_getpc_b64 s[0:1]
7753; GFX9-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7754; GFX9-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7755; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7756; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
7757; GFX9-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
7758; GFX9-NEXT:    buffer_store_dword v4, off, s[48:51], 0
7759; GFX9-NEXT:    s_mov_b64 s[4:5], s[40:41]
7760; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
7761; GFX9-NEXT:    s_mov_b64 s[6:7], s[38:39]
7762; GFX9-NEXT:    s_mov_b64 s[10:11], s[34:35]
7763; GFX9-NEXT:    s_mov_b32 s12, s43
7764; GFX9-NEXT:    s_mov_b32 s13, s42
7765; GFX9-NEXT:    s_mov_b32 s14, s33
7766; GFX9-NEXT:    v_mov_b32_e32 v31, v40
7767; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
7768; GFX9-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
7769; GFX9-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
7770; GFX9-NEXT:    v_mov_b32_e32 v0, 8
7771; GFX9-NEXT:    v_mov_b32_e32 v1, 0
7772; GFX9-NEXT:    v_mov_b32_e32 v2, s44
7773; GFX9-NEXT:    v_mov_b32_e32 v3, s45
7774; GFX9-NEXT:    v_mov_b32_e32 v4, 0
7775; GFX9-NEXT:    v_mov_b32_e32 v5, 8
7776; GFX9-NEXT:    v_mov_b32_e32 v6, 0
7777; GFX9-NEXT:    v_mov_b32_e32 v7, 0
7778; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7779; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7780; GFX9-NEXT:    buffer_load_dword v4, off, s[48:51], 0
7781; GFX9-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
7782; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
7783; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7784; GFX9-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7785; GFX9-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7786; GFX9-NEXT:    s_cbranch_execnz .LBB11_4
7787; GFX9-NEXT:  .LBB11_5:
7788; GFX9-NEXT:    s_endpgm
7789;
7790; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
7791; GFX1064:       ; %bb.0:
7792; GFX1064-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7793; GFX1064-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7794; GFX1064-NEXT:    s_mov_b32 s50, -1
7795; GFX1064-NEXT:    s_mov_b32 s51, 0x31e16000
7796; GFX1064-NEXT:    s_add_u32 s48, s48, s11
7797; GFX1064-NEXT:    s_mov_b64 s[34:35], s[4:5]
7798; GFX1064-NEXT:    s_addc_u32 s49, s49, 0
7799; GFX1064-NEXT:    s_mov_b32 s43, s8
7800; GFX1064-NEXT:    s_add_u32 s8, s34, 44
7801; GFX1064-NEXT:    s_mov_b32 s42, s9
7802; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
7803; GFX1064-NEXT:    s_mov_b64 s[40:41], s[0:1]
7804; GFX1064-NEXT:    s_getpc_b64 s[0:1]
7805; GFX1064-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
7806; GFX1064-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
7807; GFX1064-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
7808; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7809; GFX1064-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
7810; GFX1064-NEXT:    s_mov_b32 s33, s10
7811; GFX1064-NEXT:    s_mov_b64 s[36:37], s[6:7]
7812; GFX1064-NEXT:    s_mov_b64 s[38:39], s[2:3]
7813; GFX1064-NEXT:    s_mov_b64 s[6:7], s[2:3]
7814; GFX1064-NEXT:    v_or3_b32 v40, v0, v1, v2
7815; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
7816; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
7817; GFX1064-NEXT:    s_mov_b64 s[10:11], s[36:37]
7818; GFX1064-NEXT:    s_mov_b32 s12, s43
7819; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
7820; GFX1064-NEXT:    s_mov_b32 s13, s42
7821; GFX1064-NEXT:    s_mov_b32 s14, s33
7822; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
7823; GFX1064-NEXT:    s_movk_i32 s32, 0x800
7824; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
7825; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7826; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
7827; GFX1064-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
7828; GFX1064-NEXT:    s_mov_b64 s[0:1], exec
7829; GFX1064-NEXT:  .LBB11_1: ; %ComputeLoop
7830; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
7831; GFX1064-NEXT:    s_ff1_i32_b64 s4, s[0:1]
7832; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
7833; GFX1064-NEXT:    v_readlane_b32 s3, v1, s4
7834; GFX1064-NEXT:    v_readlane_b32 s2, v0, s4
7835; GFX1064-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
7836; GFX1064-NEXT:    s_lshl_b64 s[2:3], 1, s4
7837; GFX1064-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[2:3]
7838; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
7839; GFX1064-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
7840; GFX1064-NEXT:    s_cbranch_scc1 .LBB11_1
7841; GFX1064-NEXT:  ; %bb.2: ; %ComputeEnd
7842; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7843; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
7844; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
7845; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
7846; GFX1064-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
7847; GFX1064-NEXT:    s_cbranch_execz .LBB11_5
7848; GFX1064-NEXT:  ; %bb.3:
7849; GFX1064-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
7850; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
7851; GFX1064-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
7852; GFX1064-NEXT:    s_mov_b64 s[46:47], 0
7853; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
7854; GFX1064-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
7855; GFX1064-NEXT:  .LBB11_4: ; %atomicrmw.start
7856; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
7857; GFX1064-NEXT:    s_waitcnt vmcnt(0)
7858; GFX1064-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
7859; GFX1064-NEXT:    s_add_u32 s8, s34, 44
7860; GFX1064-NEXT:    s_addc_u32 s9, s35, 0
7861; GFX1064-NEXT:    s_getpc_b64 s[0:1]
7862; GFX1064-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7863; GFX1064-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7864; GFX1064-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
7865; GFX1064-NEXT:    buffer_store_dword v4, off, s[48:51], 0
7866; GFX1064-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7867; GFX1064-NEXT:    v_mov_b32_e32 v31, v40
7868; GFX1064-NEXT:    v_mov_b32_e32 v2, s44
7869; GFX1064-NEXT:    v_mov_b32_e32 v3, s45
7870; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
7871; GFX1064-NEXT:    v_mov_b32_e32 v5, 8
7872; GFX1064-NEXT:    v_mov_b32_e32 v6, 0
7873; GFX1064-NEXT:    v_mov_b32_e32 v7, 0
7874; GFX1064-NEXT:    s_mov_b64 s[0:1], s[48:49]
7875; GFX1064-NEXT:    s_mov_b64 s[4:5], s[40:41]
7876; GFX1064-NEXT:    s_mov_b64 s[6:7], s[38:39]
7877; GFX1064-NEXT:    s_mov_b64 s[10:11], s[36:37]
7878; GFX1064-NEXT:    s_mov_b32 s12, s43
7879; GFX1064-NEXT:    s_mov_b32 s13, s42
7880; GFX1064-NEXT:    s_mov_b32 s14, s33
7881; GFX1064-NEXT:    s_mov_b64 s[2:3], s[50:51]
7882; GFX1064-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
7883; GFX1064-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
7884; GFX1064-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
7885; GFX1064-NEXT:    v_mov_b32_e32 v0, 8
7886; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
7887; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
7888; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7889; GFX1064-NEXT:    s_clause 0x1
7890; GFX1064-NEXT:    buffer_load_dword v4, off, s[48:51], 0
7891; GFX1064-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
7892; GFX1064-NEXT:    v_and_b32_e32 v0, 1, v0
7893; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
7894; GFX1064-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
7895; GFX1064-NEXT:    s_andn2_b64 exec, exec, s[46:47]
7896; GFX1064-NEXT:    s_cbranch_execnz .LBB11_4
7897; GFX1064-NEXT:  .LBB11_5:
7898; GFX1064-NEXT:    s_endpgm
7899;
7900; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
7901; GFX1032:       ; %bb.0:
7902; GFX1032-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
7903; GFX1032-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
7904; GFX1032-NEXT:    s_mov_b32 s50, -1
7905; GFX1032-NEXT:    s_mov_b32 s51, 0x31c16000
7906; GFX1032-NEXT:    s_add_u32 s48, s48, s11
7907; GFX1032-NEXT:    s_mov_b64 s[34:35], s[4:5]
7908; GFX1032-NEXT:    s_addc_u32 s49, s49, 0
7909; GFX1032-NEXT:    s_mov_b32 s43, s8
7910; GFX1032-NEXT:    s_add_u32 s8, s34, 44
7911; GFX1032-NEXT:    s_mov_b32 s42, s9
7912; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
7913; GFX1032-NEXT:    s_mov_b64 s[40:41], s[0:1]
7914; GFX1032-NEXT:    s_getpc_b64 s[0:1]
7915; GFX1032-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
7916; GFX1032-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
7917; GFX1032-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
7918; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7919; GFX1032-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
7920; GFX1032-NEXT:    s_mov_b32 s33, s10
7921; GFX1032-NEXT:    s_mov_b64 s[36:37], s[6:7]
7922; GFX1032-NEXT:    s_mov_b64 s[38:39], s[2:3]
7923; GFX1032-NEXT:    s_mov_b64 s[6:7], s[2:3]
7924; GFX1032-NEXT:    v_or3_b32 v40, v0, v1, v2
7925; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
7926; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
7927; GFX1032-NEXT:    s_mov_b64 s[10:11], s[36:37]
7928; GFX1032-NEXT:    s_mov_b32 s12, s43
7929; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
7930; GFX1032-NEXT:    s_mov_b32 s13, s42
7931; GFX1032-NEXT:    s_mov_b32 s14, s33
7932; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
7933; GFX1032-NEXT:    s_movk_i32 s32, 0x400
7934; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
7935; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7936; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
7937; GFX1032-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
7938; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
7939; GFX1032-NEXT:  .LBB11_1: ; %ComputeLoop
7940; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
7941; GFX1032-NEXT:    s_ff1_i32_b32 s1, s0
7942; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
7943; GFX1032-NEXT:    v_readlane_b32 s3, v1, s1
7944; GFX1032-NEXT:    v_readlane_b32 s2, v0, s1
7945; GFX1032-NEXT:    s_lshl_b32 s1, 1, s1
7946; GFX1032-NEXT:    s_andn2_b32 s0, s0, s1
7947; GFX1032-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
7948; GFX1032-NEXT:    s_cmp_lg_u32 s0, 0
7949; GFX1032-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
7950; GFX1032-NEXT:    s_cbranch_scc1 .LBB11_1
7951; GFX1032-NEXT:  ; %bb.2: ; %ComputeEnd
7952; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
7953; GFX1032-NEXT:    s_mov_b32 s46, 0
7954; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
7955; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
7956; GFX1032-NEXT:    s_xor_b32 s0, exec_lo, s0
7957; GFX1032-NEXT:    s_cbranch_execz .LBB11_5
7958; GFX1032-NEXT:  ; %bb.3:
7959; GFX1032-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
7960; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
7961; GFX1032-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
7962; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
7963; GFX1032-NEXT:    global_load_dwordx2 v[4:5], v0, s[44:45]
7964; GFX1032-NEXT:  .LBB11_4: ; %atomicrmw.start
7965; GFX1032-NEXT:    ; =>This Inner Loop Header: Depth=1
7966; GFX1032-NEXT:    s_waitcnt vmcnt(0)
7967; GFX1032-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
7968; GFX1032-NEXT:    s_add_u32 s8, s34, 44
7969; GFX1032-NEXT:    s_addc_u32 s9, s35, 0
7970; GFX1032-NEXT:    s_getpc_b64 s[0:1]
7971; GFX1032-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
7972; GFX1032-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
7973; GFX1032-NEXT:    buffer_store_dword v5, off, s[48:51], 0 offset:4
7974; GFX1032-NEXT:    buffer_store_dword v4, off, s[48:51], 0
7975; GFX1032-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
7976; GFX1032-NEXT:    v_mov_b32_e32 v31, v40
7977; GFX1032-NEXT:    v_mov_b32_e32 v2, s44
7978; GFX1032-NEXT:    v_mov_b32_e32 v3, s45
7979; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
7980; GFX1032-NEXT:    v_mov_b32_e32 v5, 8
7981; GFX1032-NEXT:    v_mov_b32_e32 v6, 0
7982; GFX1032-NEXT:    v_mov_b32_e32 v7, 0
7983; GFX1032-NEXT:    s_mov_b64 s[0:1], s[48:49]
7984; GFX1032-NEXT:    s_mov_b64 s[4:5], s[40:41]
7985; GFX1032-NEXT:    s_mov_b64 s[6:7], s[38:39]
7986; GFX1032-NEXT:    s_mov_b64 s[10:11], s[36:37]
7987; GFX1032-NEXT:    s_mov_b32 s12, s43
7988; GFX1032-NEXT:    s_mov_b32 s13, s42
7989; GFX1032-NEXT:    s_mov_b32 s14, s33
7990; GFX1032-NEXT:    s_mov_b64 s[2:3], s[50:51]
7991; GFX1032-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
7992; GFX1032-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
7993; GFX1032-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
7994; GFX1032-NEXT:    v_mov_b32_e32 v0, 8
7995; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
7996; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
7997; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
7998; GFX1032-NEXT:    s_clause 0x1
7999; GFX1032-NEXT:    buffer_load_dword v4, off, s[48:51], 0
8000; GFX1032-NEXT:    buffer_load_dword v5, off, s[48:51], 0 offset:4
8001; GFX1032-NEXT:    v_and_b32_e32 v0, 1, v0
8002; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
8003; GFX1032-NEXT:    s_or_b32 s46, vcc_lo, s46
8004; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
8005; GFX1032-NEXT:    s_cbranch_execnz .LBB11_4
8006; GFX1032-NEXT:  .LBB11_5:
8007; GFX1032-NEXT:    s_endpgm
8008;
8009; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8010; GFX1164:       ; %bb.0:
8011; GFX1164-NEXT:    s_mov_b64 s[34:35], s[4:5]
8012; GFX1164-NEXT:    s_mov_b32 s43, s8
8013; GFX1164-NEXT:    s_add_u32 s8, s34, 44
8014; GFX1164-NEXT:    s_mov_b32 s42, s9
8015; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
8016; GFX1164-NEXT:    s_mov_b64 s[40:41], s[0:1]
8017; GFX1164-NEXT:    s_getpc_b64 s[0:1]
8018; GFX1164-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8019; GFX1164-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8020; GFX1164-NEXT:    v_mov_b32_e32 v31, v0
8021; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8022; GFX1164-NEXT:    s_mov_b32 s33, s10
8023; GFX1164-NEXT:    s_mov_b64 s[36:37], s[6:7]
8024; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
8025; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
8026; GFX1164-NEXT:    s_mov_b64 s[10:11], s[36:37]
8027; GFX1164-NEXT:    s_mov_b32 s12, s43
8028; GFX1164-NEXT:    s_mov_b32 s13, s42
8029; GFX1164-NEXT:    s_mov_b32 s14, s33
8030; GFX1164-NEXT:    s_mov_b32 s32, 32
8031; GFX1164-NEXT:    v_mov_b32_e32 v40, v0
8032; GFX1164-NEXT:    s_mov_b64 s[38:39], s[2:3]
8033; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
8034; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8035; GFX1164-NEXT:    v_mov_b32_e32 v2, 0
8036; GFX1164-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
8037; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
8038; GFX1164-NEXT:  .LBB11_1: ; %ComputeLoop
8039; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
8040; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8041; GFX1164-NEXT:    s_ctz_i32_b64 s4, s[0:1]
8042; GFX1164-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
8043; GFX1164-NEXT:    v_readlane_b32 s3, v1, s4
8044; GFX1164-NEXT:    v_readlane_b32 s2, v0, s4
8045; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
8046; GFX1164-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
8047; GFX1164-NEXT:    s_lshl_b64 s[2:3], 1, s4
8048; GFX1164-NEXT:    s_and_not1_b64 s[0:1], s[0:1], s[2:3]
8049; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8050; GFX1164-NEXT:    s_cmp_lg_u64 s[0:1], 0
8051; GFX1164-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
8052; GFX1164-NEXT:    s_cbranch_scc1 .LBB11_1
8053; GFX1164-NEXT:  ; %bb.2: ; %ComputeEnd
8054; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
8055; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
8056; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8057; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
8058; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
8059; GFX1164-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
8060; GFX1164-NEXT:    s_cbranch_execz .LBB11_5
8061; GFX1164-NEXT:  ; %bb.3:
8062; GFX1164-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
8063; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
8064; GFX1164-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
8065; GFX1164-NEXT:    s_mov_b64 s[46:47], 0
8066; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
8067; GFX1164-NEXT:    global_load_b64 v[4:5], v0, s[44:45]
8068; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x1
8069; GFX1164-NEXT:    .p2align 6
8070; GFX1164-NEXT:  .LBB11_4: ; %atomicrmw.start
8071; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
8072; GFX1164-NEXT:    s_waitcnt vmcnt(0)
8073; GFX1164-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
8074; GFX1164-NEXT:    s_add_u32 s8, s34, 44
8075; GFX1164-NEXT:    s_addc_u32 s9, s35, 0
8076; GFX1164-NEXT:    s_getpc_b64 s[0:1]
8077; GFX1164-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8078; GFX1164-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8079; GFX1164-NEXT:    v_mov_b32_e32 v31, v40
8080; GFX1164-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8081; GFX1164-NEXT:    v_mov_b32_e32 v2, s44
8082; GFX1164-NEXT:    v_mov_b32_e32 v3, s45
8083; GFX1164-NEXT:    v_mov_b32_e32 v6, 0
8084; GFX1164-NEXT:    v_mov_b32_e32 v7, 0
8085; GFX1164-NEXT:    s_mov_b64 s[4:5], s[40:41]
8086; GFX1164-NEXT:    s_mov_b64 s[6:7], s[38:39]
8087; GFX1164-NEXT:    s_mov_b64 s[10:11], s[36:37]
8088; GFX1164-NEXT:    s_mov_b32 s12, s43
8089; GFX1164-NEXT:    s_mov_b32 s13, s42
8090; GFX1164-NEXT:    s_mov_b32 s14, s33
8091; GFX1164-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
8092; GFX1164-NEXT:    scratch_store_b64 off, v[4:5], off
8093; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
8094; GFX1164-NEXT:    v_mov_b32_e32 v5, 8
8095; GFX1164-NEXT:    scratch_store_b64 off, v[0:1], off offset:8
8096; GFX1164-NEXT:    v_mov_b32_e32 v0, 8
8097; GFX1164-NEXT:    v_mov_b32_e32 v1, 0
8098; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
8099; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8100; GFX1164-NEXT:    scratch_load_b64 v[4:5], off, off
8101; GFX1164-NEXT:    v_and_b32_e32 v0, 1, v0
8102; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
8103; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
8104; GFX1164-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
8105; GFX1164-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
8106; GFX1164-NEXT:    s_cbranch_execnz .LBB11_4
8107; GFX1164-NEXT:  .LBB11_5:
8108; GFX1164-NEXT:    s_set_inst_prefetch_distance 0x2
8109; GFX1164-NEXT:    s_endpgm
8110;
8111; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8112; GFX1132:       ; %bb.0:
8113; GFX1132-NEXT:    s_mov_b64 s[34:35], s[4:5]
8114; GFX1132-NEXT:    s_mov_b64 s[40:41], s[0:1]
8115; GFX1132-NEXT:    s_add_u32 s8, s34, 44
8116; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
8117; GFX1132-NEXT:    s_getpc_b64 s[0:1]
8118; GFX1132-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8119; GFX1132-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8120; GFX1132-NEXT:    v_mov_b32_e32 v31, v0
8121; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8122; GFX1132-NEXT:    s_mov_b64 s[36:37], s[6:7]
8123; GFX1132-NEXT:    s_mov_b32 s42, s14
8124; GFX1132-NEXT:    s_mov_b32 s43, s13
8125; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
8126; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
8127; GFX1132-NEXT:    s_mov_b64 s[10:11], s[36:37]
8128; GFX1132-NEXT:    s_mov_b32 s12, s13
8129; GFX1132-NEXT:    s_mov_b32 s13, s14
8130; GFX1132-NEXT:    s_mov_b32 s14, s15
8131; GFX1132-NEXT:    s_mov_b32 s32, 32
8132; GFX1132-NEXT:    s_mov_b32 s33, s15
8133; GFX1132-NEXT:    v_mov_b32_e32 v40, v0
8134; GFX1132-NEXT:    s_mov_b64 s[38:39], s[2:3]
8135; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
8136; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8137; GFX1132-NEXT:    v_mov_b32_e32 v2, 0
8138; GFX1132-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
8139; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
8140; GFX1132-NEXT:  .LBB11_1: ; %ComputeLoop
8141; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
8142; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8143; GFX1132-NEXT:    s_ctz_i32_b32 s1, s0
8144; GFX1132-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
8145; GFX1132-NEXT:    v_readlane_b32 s3, v1, s1
8146; GFX1132-NEXT:    v_readlane_b32 s2, v0, s1
8147; GFX1132-NEXT:    s_lshl_b32 s1, 1, s1
8148; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8149; GFX1132-NEXT:    s_and_not1_b32 s0, s0, s1
8150; GFX1132-NEXT:    v_max_f64 v[4:5], s[2:3], s[2:3]
8151; GFX1132-NEXT:    s_cmp_lg_u32 s0, 0
8152; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8153; GFX1132-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
8154; GFX1132-NEXT:    s_cbranch_scc1 .LBB11_1
8155; GFX1132-NEXT:  ; %bb.2: ; %ComputeEnd
8156; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
8157; GFX1132-NEXT:    s_mov_b32 s46, 0
8158; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
8159; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8160; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
8161; GFX1132-NEXT:    s_xor_b32 s0, exec_lo, s0
8162; GFX1132-NEXT:    s_cbranch_execz .LBB11_5
8163; GFX1132-NEXT:  ; %bb.3:
8164; GFX1132-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
8165; GFX1132-NEXT:    v_mov_b32_e32 v0, 0
8166; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_4)
8167; GFX1132-NEXT:    v_max_f64 v[41:42], v[2:3], v[2:3]
8168; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
8169; GFX1132-NEXT:    global_load_b64 v[4:5], v0, s[44:45]
8170; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x1
8171; GFX1132-NEXT:    .p2align 6
8172; GFX1132-NEXT:  .LBB11_4: ; %atomicrmw.start
8173; GFX1132-NEXT:    ; =>This Inner Loop Header: Depth=1
8174; GFX1132-NEXT:    s_waitcnt vmcnt(0)
8175; GFX1132-NEXT:    v_max_f64 v[0:1], v[4:5], v[4:5]
8176; GFX1132-NEXT:    s_add_u32 s8, s34, 44
8177; GFX1132-NEXT:    s_addc_u32 s9, s35, 0
8178; GFX1132-NEXT:    s_getpc_b64 s[0:1]
8179; GFX1132-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8180; GFX1132-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8181; GFX1132-NEXT:    v_mov_b32_e32 v31, v40
8182; GFX1132-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8183; GFX1132-NEXT:    v_mov_b32_e32 v3, s45
8184; GFX1132-NEXT:    v_mov_b32_e32 v7, 0
8185; GFX1132-NEXT:    s_mov_b64 s[4:5], s[40:41]
8186; GFX1132-NEXT:    s_mov_b64 s[6:7], s[38:39]
8187; GFX1132-NEXT:    s_mov_b64 s[10:11], s[36:37]
8188; GFX1132-NEXT:    s_mov_b32 s12, s43
8189; GFX1132-NEXT:    s_mov_b32 s13, s42
8190; GFX1132-NEXT:    s_mov_b32 s14, s33
8191; GFX1132-NEXT:    v_mov_b32_e32 v6, 0
8192; GFX1132-NEXT:    v_mov_b32_e32 v2, s44
8193; GFX1132-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
8194; GFX1132-NEXT:    scratch_store_b64 off, v[4:5], off
8195; GFX1132-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
8196; GFX1132-NEXT:    scratch_store_b64 off, v[0:1], off offset:8
8197; GFX1132-NEXT:    v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
8198; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
8199; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8200; GFX1132-NEXT:    scratch_load_b64 v[4:5], off, off
8201; GFX1132-NEXT:    v_and_b32_e32 v0, 1, v0
8202; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
8203; GFX1132-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
8204; GFX1132-NEXT:    s_or_b32 s46, vcc_lo, s46
8205; GFX1132-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
8206; GFX1132-NEXT:    s_cbranch_execnz .LBB11_4
8207; GFX1132-NEXT:  .LBB11_5:
8208; GFX1132-NEXT:    s_set_inst_prefetch_distance 0x2
8209; GFX1132-NEXT:    s_endpgm
8210;
8211; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8212; GFX7LESS-DPP:       ; %bb.0:
8213; GFX7LESS-DPP-NEXT:    s_movk_i32 s32, 0x800
8214; GFX7LESS-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
8215; GFX7LESS-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
8216; GFX7LESS-DPP-NEXT:    s_mov_b32 s50, -1
8217; GFX7LESS-DPP-NEXT:    s_mov_b32 s51, 0xe8f000
8218; GFX7LESS-DPP-NEXT:    s_add_u32 s48, s48, s11
8219; GFX7LESS-DPP-NEXT:    s_addc_u32 s49, s49, 0
8220; GFX7LESS-DPP-NEXT:    s_mov_b32 s33, s10
8221; GFX7LESS-DPP-NEXT:    s_mov_b32 s42, s9
8222; GFX7LESS-DPP-NEXT:    s_mov_b32 s43, s8
8223; GFX7LESS-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
8224; GFX7LESS-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
8225; GFX7LESS-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8226; GFX7LESS-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8227; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[44:45], s[4:5], 0x9
8228; GFX7LESS-DPP-NEXT:    s_mov_b32 s47, 0xf000
8229; GFX7LESS-DPP-NEXT:    s_mov_b32 s46, -1
8230; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
8231; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
8232; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
8233; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8234; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8235; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8236; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
8237; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
8238; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
8239; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v40, v0, v2
8240; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8241; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8242; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
8243; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
8244; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
8245; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
8246; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
8247; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8248; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8249; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8250; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8251; GFX7LESS-DPP-NEXT:    buffer_load_dwordx2 v[2:3], off, s[44:47], 0
8252; GFX7LESS-DPP-NEXT:    s_mov_b64 s[46:47], 0
8253; GFX7LESS-DPP-NEXT:    v_max_f64 v[41:42], v[0:1], v[0:1]
8254; GFX7LESS-DPP-NEXT:  .LBB11_1: ; %atomicrmw.start
8255; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8256; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
8257; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
8258; GFX7LESS-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:4
8259; GFX7LESS-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0
8260; GFX7LESS-DPP-NEXT:    s_add_u32 s8, s36, 44
8261; GFX7LESS-DPP-NEXT:    v_max_f64 v[0:1], v[0:1], v[41:42]
8262; GFX7LESS-DPP-NEXT:    s_addc_u32 s9, s37, 0
8263; GFX7LESS-DPP-NEXT:    s_getpc_b64 s[0:1]
8264; GFX7LESS-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8265; GFX7LESS-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8266; GFX7LESS-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0 offset:12
8267; GFX7LESS-DPP-NEXT:    buffer_store_dword v0, off, s[48:51], 0 offset:8
8268; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8269; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
8270; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v0, 8
8271; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, 0
8272; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v4, 0
8273; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v5, 8
8274; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v6, 0
8275; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v7, 0
8276; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8277; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8278; GFX7LESS-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
8279; GFX7LESS-DPP-NEXT:    s_mov_b32 s12, s43
8280; GFX7LESS-DPP-NEXT:    s_mov_b32 s13, s42
8281; GFX7LESS-DPP-NEXT:    s_mov_b32 s14, s33
8282; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v31, v40
8283; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8284; GFX7LESS-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8285; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, s44
8286; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, s45
8287; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8288; GFX7LESS-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8289; GFX7LESS-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8290; GFX7LESS-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0
8291; GFX7LESS-DPP-NEXT:    buffer_load_dword v3, off, s[48:51], 0 offset:4
8292; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
8293; GFX7LESS-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
8294; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
8295; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB11_1
8296; GFX7LESS-DPP-NEXT:  ; %bb.2: ; %atomicrmw.end
8297; GFX7LESS-DPP-NEXT:    s_endpgm
8298;
8299; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8300; GFX9-DPP:       ; %bb.0:
8301; GFX9-DPP-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
8302; GFX9-DPP-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
8303; GFX9-DPP-NEXT:    s_mov_b32 s54, -1
8304; GFX9-DPP-NEXT:    s_mov_b32 s55, 0xe00000
8305; GFX9-DPP-NEXT:    s_add_u32 s52, s52, s11
8306; GFX9-DPP-NEXT:    s_addc_u32 s53, s53, 0
8307; GFX9-DPP-NEXT:    s_mov_b64 s[36:37], s[4:5]
8308; GFX9-DPP-NEXT:    s_mov_b32 s43, s8
8309; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
8310; GFX9-DPP-NEXT:    s_mov_b32 s42, s9
8311; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
8312; GFX9-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8313; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
8314; GFX9-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8315; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8316; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8317; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
8318; GFX9-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
8319; GFX9-DPP-NEXT:    s_mov_b32 s33, s10
8320; GFX9-DPP-NEXT:    s_mov_b64 s[34:35], s[6:7]
8321; GFX9-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8322; GFX9-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
8323; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8324; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[52:53]
8325; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8326; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
8327; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
8328; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
8329; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
8330; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
8331; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[54:55]
8332; GFX9-DPP-NEXT:    s_movk_i32 s32, 0x800
8333; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8334; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8335; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
8336; GFX9-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
8337; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v11, v9, v1, s[0:1]
8338; GFX9-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
8339; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
8340; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
8341; GFX9-DPP-NEXT:    v_mov_b32_e32 v8, 0
8342; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
8343; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
8344; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8345; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
8346; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
8347; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
8348; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
8349; GFX9-DPP-NEXT:    s_nop 0
8350; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
8351; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
8352; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
8353; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
8354; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
8355; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
8356; GFX9-DPP-NEXT:    s_nop 0
8357; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
8358; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
8359; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
8360; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
8361; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
8362; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
8363; GFX9-DPP-NEXT:    s_nop 0
8364; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
8365; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
8366; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
8367; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
8368; GFX9-DPP-NEXT:    v_mov_b32_e32 v13, 0x7ff80000
8369; GFX9-DPP-NEXT:    v_mov_b32_e32 v12, 0
8370; GFX9-DPP-NEXT:    s_nop 0
8371; GFX9-DPP-NEXT:    v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
8372; GFX9-DPP-NEXT:    v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
8373; GFX9-DPP-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
8374; GFX9-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[12:13]
8375; GFX9-DPP-NEXT:    s_nop 1
8376; GFX9-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
8377; GFX9-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
8378; GFX9-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
8379; GFX9-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8380; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
8381; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 0
8382; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
8383; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
8384; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
8385; GFX9-DPP-NEXT:    v_readlane_b32 s45, v9, 63
8386; GFX9-DPP-NEXT:    v_readlane_b32 s44, v8, 63
8387; GFX9-DPP-NEXT:    s_mov_b64 exec, s[0:1]
8388; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
8389; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
8390; GFX9-DPP-NEXT:    s_cbranch_execz .LBB11_3
8391; GFX9-DPP-NEXT:  ; %bb.1:
8392; GFX9-DPP-NEXT:    s_load_dwordx2 s[46:47], s[36:37], 0x24
8393; GFX9-DPP-NEXT:    s_mov_b64 s[48:49], 0
8394; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8395; GFX9-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[46:47]
8396; GFX9-DPP-NEXT:  .LBB11_2: ; %atomicrmw.start
8397; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8398; GFX9-DPP-NEXT:    v_max_f64 v[3:4], s[44:45], s[44:45]
8399; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
8400; GFX9-DPP-NEXT:    v_max_f64 v[5:6], v[1:2], v[1:2]
8401; GFX9-DPP-NEXT:    s_add_u32 s8, s36, 44
8402; GFX9-DPP-NEXT:    s_addc_u32 s9, s37, 0
8403; GFX9-DPP-NEXT:    s_getpc_b64 s[0:1]
8404; GFX9-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8405; GFX9-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8406; GFX9-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8407; GFX9-DPP-NEXT:    s_mov_b64 s[0:1], s[52:53]
8408; GFX9-DPP-NEXT:    buffer_store_dword v2, off, s[52:55], 0 offset:4
8409; GFX9-DPP-NEXT:    buffer_store_dword v1, off, s[52:55], 0
8410; GFX9-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8411; GFX9-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
8412; GFX9-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8413; GFX9-DPP-NEXT:    s_mov_b64 s[10:11], s[34:35]
8414; GFX9-DPP-NEXT:    s_mov_b32 s12, s43
8415; GFX9-DPP-NEXT:    s_mov_b32 s13, s42
8416; GFX9-DPP-NEXT:    s_mov_b32 s14, s33
8417; GFX9-DPP-NEXT:    v_mov_b32_e32 v31, v40
8418; GFX9-DPP-NEXT:    buffer_store_dword v4, off, s[52:55], 0 offset:12
8419; GFX9-DPP-NEXT:    buffer_store_dword v3, off, s[52:55], 0 offset:8
8420; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[54:55]
8421; GFX9-DPP-NEXT:    v_mov_b32_e32 v0, 8
8422; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, 0
8423; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, s46
8424; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, s47
8425; GFX9-DPP-NEXT:    v_mov_b32_e32 v4, 0
8426; GFX9-DPP-NEXT:    v_mov_b32_e32 v5, 8
8427; GFX9-DPP-NEXT:    v_mov_b32_e32 v6, 0
8428; GFX9-DPP-NEXT:    v_mov_b32_e32 v7, 0
8429; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8430; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8431; GFX9-DPP-NEXT:    buffer_load_dword v1, off, s[52:55], 0
8432; GFX9-DPP-NEXT:    buffer_load_dword v2, off, s[52:55], 0 offset:4
8433; GFX9-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8434; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
8435; GFX9-DPP-NEXT:    s_or_b64 s[48:49], vcc, s[48:49]
8436; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[48:49]
8437; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB11_2
8438; GFX9-DPP-NEXT:  .LBB11_3:
8439; GFX9-DPP-NEXT:    s_endpgm
8440;
8441; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8442; GFX1064-DPP:       ; %bb.0:
8443; GFX1064-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
8444; GFX1064-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
8445; GFX1064-DPP-NEXT:    s_mov_b32 s50, -1
8446; GFX1064-DPP-NEXT:    s_mov_b32 s51, 0x31e16000
8447; GFX1064-DPP-NEXT:    s_add_u32 s48, s48, s11
8448; GFX1064-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
8449; GFX1064-DPP-NEXT:    s_addc_u32 s49, s49, 0
8450; GFX1064-DPP-NEXT:    s_mov_b32 s43, s8
8451; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
8452; GFX1064-DPP-NEXT:    s_mov_b32 s42, s9
8453; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
8454; GFX1064-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8455; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
8456; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8457; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8458; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
8459; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8460; GFX1064-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
8461; GFX1064-DPP-NEXT:    s_mov_b32 s33, s10
8462; GFX1064-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
8463; GFX1064-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8464; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8465; GFX1064-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
8466; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8467; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8468; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8469; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
8470; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
8471; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
8472; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
8473; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8474; GFX1064-DPP-NEXT:    s_movk_i32 s32, 0x800
8475; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8476; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8477; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
8478; GFX1064-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
8479; GFX1064-DPP-NEXT:    v_mov_b32_e32 v8, 0
8480; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1]
8481; GFX1064-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
8482; GFX1064-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
8483; GFX1064-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
8484; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8485; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
8486; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8487; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8488; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
8489; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
8490; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
8491; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8492; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8493; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8494; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
8495; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
8496; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
8497; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8498; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8499; GFX1064-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8500; GFX1064-DPP-NEXT:    v_mov_b32_e32 v10, 0
8501; GFX1064-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
8502; GFX1064-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
8503; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8504; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8505; GFX1064-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
8506; GFX1064-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
8507; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8508; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8509; GFX1064-DPP-NEXT:    v_readlane_b32 s3, v9, 0
8510; GFX1064-DPP-NEXT:    v_readlane_b32 s5, v9, 32
8511; GFX1064-DPP-NEXT:    v_readlane_b32 s4, v8, 32
8512; GFX1064-DPP-NEXT:    v_readlane_b32 s2, v8, 0
8513; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], s[4:5], s[4:5]
8514; GFX1064-DPP-NEXT:    v_max_f64 v[10:11], s[2:3], s[2:3]
8515; GFX1064-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8516; GFX1064-DPP-NEXT:    s_mov_b64 exec, s[0:1]
8517; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
8518; GFX1064-DPP-NEXT:    v_mov_b32_e32 v41, v8
8519; GFX1064-DPP-NEXT:    v_mov_b32_e32 v42, v9
8520; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
8521; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
8522; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
8523; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
8524; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB11_3
8525; GFX1064-DPP-NEXT:  ; %bb.1:
8526; GFX1064-DPP-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
8527; GFX1064-DPP-NEXT:    s_mov_b64 s[46:47], 0
8528; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8529; GFX1064-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[44:45]
8530; GFX1064-DPP-NEXT:  .LBB11_2: ; %atomicrmw.start
8531; GFX1064-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8532; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[41:42], v[41:42]
8533; GFX1064-DPP-NEXT:    s_waitcnt vmcnt(0)
8534; GFX1064-DPP-NEXT:    v_max_f64 v[5:6], v[1:2], v[1:2]
8535; GFX1064-DPP-NEXT:    s_add_u32 s8, s34, 44
8536; GFX1064-DPP-NEXT:    s_addc_u32 s9, s35, 0
8537; GFX1064-DPP-NEXT:    s_getpc_b64 s[0:1]
8538; GFX1064-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8539; GFX1064-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8540; GFX1064-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
8541; GFX1064-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
8542; GFX1064-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8543; GFX1064-DPP-NEXT:    v_mov_b32_e32 v31, v40
8544; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 8
8545; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 0
8546; GFX1064-DPP-NEXT:    v_mov_b32_e32 v2, s44
8547; GFX1064-DPP-NEXT:    v_mov_b32_e32 v7, 0
8548; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8549; GFX1064-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8550; GFX1064-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8551; GFX1064-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8552; GFX1064-DPP-NEXT:    s_mov_b32 s12, s43
8553; GFX1064-DPP-NEXT:    s_mov_b32 s13, s42
8554; GFX1064-DPP-NEXT:    s_mov_b32 s14, s33
8555; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8556; GFX1064-DPP-NEXT:    v_max_f64 v[3:4], v[5:6], v[3:4]
8557; GFX1064-DPP-NEXT:    v_mov_b32_e32 v5, 8
8558; GFX1064-DPP-NEXT:    v_mov_b32_e32 v6, 0
8559; GFX1064-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
8560; GFX1064-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
8561; GFX1064-DPP-NEXT:    v_mov_b32_e32 v3, s45
8562; GFX1064-DPP-NEXT:    v_mov_b32_e32 v4, 0
8563; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8564; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8565; GFX1064-DPP-NEXT:    s_clause 0x1
8566; GFX1064-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
8567; GFX1064-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
8568; GFX1064-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8569; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
8570; GFX1064-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
8571; GFX1064-DPP-NEXT:    s_andn2_b64 exec, exec, s[46:47]
8572; GFX1064-DPP-NEXT:    s_cbranch_execnz .LBB11_2
8573; GFX1064-DPP-NEXT:  .LBB11_3:
8574; GFX1064-DPP-NEXT:    s_endpgm
8575;
8576; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8577; GFX1032-DPP:       ; %bb.0:
8578; GFX1032-DPP-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
8579; GFX1032-DPP-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
8580; GFX1032-DPP-NEXT:    s_mov_b32 s50, -1
8581; GFX1032-DPP-NEXT:    s_mov_b32 s51, 0x31c16000
8582; GFX1032-DPP-NEXT:    s_add_u32 s48, s48, s11
8583; GFX1032-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
8584; GFX1032-DPP-NEXT:    s_addc_u32 s49, s49, 0
8585; GFX1032-DPP-NEXT:    s_mov_b32 s43, s8
8586; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
8587; GFX1032-DPP-NEXT:    s_mov_b32 s42, s9
8588; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
8589; GFX1032-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8590; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
8591; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8592; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8593; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
8594; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8595; GFX1032-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
8596; GFX1032-DPP-NEXT:    s_mov_b32 s33, s10
8597; GFX1032-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
8598; GFX1032-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8599; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8600; GFX1032-DPP-NEXT:    v_or3_b32 v40, v0, v1, v2
8601; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8602; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8603; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8604; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
8605; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
8606; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
8607; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
8608; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8609; GFX1032-DPP-NEXT:    s_movk_i32 s32, 0x400
8610; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8611; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8612; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
8613; GFX1032-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
8614; GFX1032-DPP-NEXT:    v_mov_b32_e32 v8, 0
8615; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0
8616; GFX1032-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s0
8617; GFX1032-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
8618; GFX1032-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
8619; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8620; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
8621; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8622; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8623; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
8624; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
8625; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
8626; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8627; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8628; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8629; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
8630; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
8631; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
8632; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8633; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8634; GFX1032-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8635; GFX1032-DPP-NEXT:    v_mov_b32_e32 v10, 0
8636; GFX1032-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
8637; GFX1032-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
8638; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8639; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8640; GFX1032-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
8641; GFX1032-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
8642; GFX1032-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8643; GFX1032-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8644; GFX1032-DPP-NEXT:    s_mov_b32 exec_lo, s0
8645; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, v8
8646; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
8647; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
8648; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, v9
8649; GFX1032-DPP-NEXT:    s_mov_b32 s46, 0
8650; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
8651; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
8652; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB11_3
8653; GFX1032-DPP-NEXT:  ; %bb.1:
8654; GFX1032-DPP-NEXT:    s_load_dwordx2 s[44:45], s[34:35], 0x24
8655; GFX1032-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
8656; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8657; GFX1032-DPP-NEXT:    global_load_dwordx2 v[1:2], v0, s[44:45]
8658; GFX1032-DPP-NEXT:  .LBB11_2: ; %atomicrmw.start
8659; GFX1032-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8660; GFX1032-DPP-NEXT:    s_waitcnt vmcnt(0)
8661; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
8662; GFX1032-DPP-NEXT:    s_add_u32 s8, s34, 44
8663; GFX1032-DPP-NEXT:    s_addc_u32 s9, s35, 0
8664; GFX1032-DPP-NEXT:    s_getpc_b64 s[0:1]
8665; GFX1032-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8666; GFX1032-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8667; GFX1032-DPP-NEXT:    buffer_store_dword v2, off, s[48:51], 0 offset:4
8668; GFX1032-DPP-NEXT:    buffer_store_dword v1, off, s[48:51], 0
8669; GFX1032-DPP-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
8670; GFX1032-DPP-NEXT:    v_mov_b32_e32 v31, v40
8671; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 8
8672; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 0
8673; GFX1032-DPP-NEXT:    v_mov_b32_e32 v2, s44
8674; GFX1032-DPP-NEXT:    v_mov_b32_e32 v5, 8
8675; GFX1032-DPP-NEXT:    v_mov_b32_e32 v6, 0
8676; GFX1032-DPP-NEXT:    v_mov_b32_e32 v7, 0
8677; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
8678; GFX1032-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8679; GFX1032-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8680; GFX1032-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8681; GFX1032-DPP-NEXT:    s_mov_b32 s12, s43
8682; GFX1032-DPP-NEXT:    s_mov_b32 s13, s42
8683; GFX1032-DPP-NEXT:    s_mov_b32 s14, s33
8684; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[50:51]
8685; GFX1032-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
8686; GFX1032-DPP-NEXT:    buffer_store_dword v4, off, s[48:51], 0 offset:12
8687; GFX1032-DPP-NEXT:    buffer_store_dword v3, off, s[48:51], 0 offset:8
8688; GFX1032-DPP-NEXT:    v_mov_b32_e32 v3, s45
8689; GFX1032-DPP-NEXT:    v_mov_b32_e32 v4, 0
8690; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8691; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
8692; GFX1032-DPP-NEXT:    s_clause 0x1
8693; GFX1032-DPP-NEXT:    buffer_load_dword v1, off, s[48:51], 0
8694; GFX1032-DPP-NEXT:    buffer_load_dword v2, off, s[48:51], 0 offset:4
8695; GFX1032-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8696; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
8697; GFX1032-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
8698; GFX1032-DPP-NEXT:    s_andn2_b32 exec_lo, exec_lo, s46
8699; GFX1032-DPP-NEXT:    s_cbranch_execnz .LBB11_2
8700; GFX1032-DPP-NEXT:  .LBB11_3:
8701; GFX1032-DPP-NEXT:    s_endpgm
8702;
8703; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8704; GFX1164-DPP:       ; %bb.0:
8705; GFX1164-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
8706; GFX1164-DPP-NEXT:    s_mov_b32 s43, s8
8707; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
8708; GFX1164-DPP-NEXT:    s_mov_b32 s42, s9
8709; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
8710; GFX1164-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8711; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
8712; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8713; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8714; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v0
8715; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8716; GFX1164-DPP-NEXT:    s_mov_b32 s33, s10
8717; GFX1164-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
8718; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8719; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8720; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8721; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
8722; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
8723; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
8724; GFX1164-DPP-NEXT:    s_mov_b32 s32, 32
8725; GFX1164-DPP-NEXT:    v_mov_b32_e32 v40, v0
8726; GFX1164-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8727; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8728; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8729; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
8730; GFX1164-DPP-NEXT:    v_mov_b32_e32 v9, 0x7ff80000
8731; GFX1164-DPP-NEXT:    v_mov_b32_e32 v8, 0
8732; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1]
8733; GFX1164-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[0:1]
8734; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8735; GFX1164-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
8736; GFX1164-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
8737; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8738; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8739; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
8740; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8741; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8742; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
8743; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8744; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
8745; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
8746; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8747; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8748; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8749; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8750; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
8751; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8752; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
8753; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
8754; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8755; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8756; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8757; GFX1164-DPP-NEXT:    v_mov_b32_e32 v11, 0x7ff80000
8758; GFX1164-DPP-NEXT:    v_mov_b32_e32 v10, 0
8759; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8760; GFX1164-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
8761; GFX1164-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
8762; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8763; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8764; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8765; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8766; GFX1164-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
8767; GFX1164-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
8768; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8769; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8770; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8771; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8772; GFX1164-DPP-NEXT:    v_permlane64_b32 v11, v9
8773; GFX1164-DPP-NEXT:    v_permlane64_b32 v10, v8
8774; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8775; GFX1164-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8776; GFX1164-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8777; GFX1164-DPP-NEXT:    s_mov_b64 exec, s[0:1]
8778; GFX1164-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8779; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
8780; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, v8
8781; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
8782; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, v9
8783; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
8784; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v0
8785; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
8786; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
8787; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v1
8788; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB11_3
8789; GFX1164-DPP-NEXT:  ; %bb.1:
8790; GFX1164-DPP-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
8791; GFX1164-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
8792; GFX1164-DPP-NEXT:    s_mov_b64 s[46:47], 0
8793; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8794; GFX1164-DPP-NEXT:    global_load_b64 v[1:2], v0, s[44:45]
8795; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
8796; GFX1164-DPP-NEXT:    .p2align 6
8797; GFX1164-DPP-NEXT:  .LBB11_2: ; %atomicrmw.start
8798; GFX1164-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8799; GFX1164-DPP-NEXT:    s_waitcnt vmcnt(0)
8800; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
8801; GFX1164-DPP-NEXT:    s_add_u32 s8, s34, 44
8802; GFX1164-DPP-NEXT:    s_addc_u32 s9, s35, 0
8803; GFX1164-DPP-NEXT:    s_getpc_b64 s[0:1]
8804; GFX1164-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8805; GFX1164-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8806; GFX1164-DPP-NEXT:    v_mov_b32_e32 v31, v40
8807; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8808; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 8
8809; GFX1164-DPP-NEXT:    v_mov_b32_e32 v5, 8
8810; GFX1164-DPP-NEXT:    v_mov_b32_e32 v6, 0
8811; GFX1164-DPP-NEXT:    v_mov_b32_e32 v7, 0
8812; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8813; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8814; GFX1164-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8815; GFX1164-DPP-NEXT:    s_mov_b32 s12, s43
8816; GFX1164-DPP-NEXT:    s_mov_b32 s13, s42
8817; GFX1164-DPP-NEXT:    s_mov_b32 s14, s33
8818; GFX1164-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
8819; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
8820; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 0
8821; GFX1164-DPP-NEXT:    v_mov_b32_e32 v2, s44
8822; GFX1164-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
8823; GFX1164-DPP-NEXT:    v_mov_b32_e32 v3, s45
8824; GFX1164-DPP-NEXT:    v_mov_b32_e32 v4, 0
8825; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8826; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8827; GFX1164-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
8828; GFX1164-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8829; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
8830; GFX1164-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
8831; GFX1164-DPP-NEXT:    s_or_b64 s[46:47], vcc, s[46:47]
8832; GFX1164-DPP-NEXT:    s_and_not1_b64 exec, exec, s[46:47]
8833; GFX1164-DPP-NEXT:    s_cbranch_execnz .LBB11_2
8834; GFX1164-DPP-NEXT:  .LBB11_3:
8835; GFX1164-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
8836; GFX1164-DPP-NEXT:    s_endpgm
8837;
8838; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
8839; GFX1132-DPP:       ; %bb.0:
8840; GFX1132-DPP-NEXT:    s_mov_b64 s[34:35], s[4:5]
8841; GFX1132-DPP-NEXT:    s_mov_b64 s[40:41], s[0:1]
8842; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
8843; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
8844; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
8845; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4
8846; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12
8847; GFX1132-DPP-NEXT:    v_mov_b32_e32 v31, v0
8848; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8849; GFX1132-DPP-NEXT:    s_mov_b64 s[36:37], s[6:7]
8850; GFX1132-DPP-NEXT:    s_mov_b32 s42, s14
8851; GFX1132-DPP-NEXT:    s_mov_b32 s43, s13
8852; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8853; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
8854; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8855; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
8856; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
8857; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
8858; GFX1132-DPP-NEXT:    s_mov_b32 s32, 32
8859; GFX1132-DPP-NEXT:    s_mov_b32 s33, s15
8860; GFX1132-DPP-NEXT:    v_mov_b32_e32 v40, v0
8861; GFX1132-DPP-NEXT:    s_mov_b64 s[38:39], s[2:3]
8862; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8863; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8864; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
8865; GFX1132-DPP-NEXT:    v_dual_mov_b32 v9, 0x7ff80000 :: v_dual_mov_b32 v8, 0
8866; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0
8867; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s0
8868; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8869; GFX1132-DPP-NEXT:    v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
8870; GFX1132-DPP-NEXT:    v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
8871; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8872; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8873; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
8874; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[10:11], v[8:9]
8875; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
8876; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8877; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf
8878; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf
8879; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8880; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8881; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8882; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
8883; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8884; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf
8885; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf
8886; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8887; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8888; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8889; GFX1132-DPP-NEXT:    v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0
8890; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8891; GFX1132-DPP-NEXT:    v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf
8892; GFX1132-DPP-NEXT:    v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf
8893; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8894; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8895; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8896; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8897; GFX1132-DPP-NEXT:    v_permlanex16_b32 v11, v9, 0, 0
8898; GFX1132-DPP-NEXT:    v_permlanex16_b32 v10, v8, 0, 0
8899; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8900; GFX1132-DPP-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
8901; GFX1132-DPP-NEXT:    v_max_f64 v[8:9], v[8:9], v[10:11]
8902; GFX1132-DPP-NEXT:    s_mov_b32 exec_lo, s0
8903; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
8904; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
8905; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
8906; GFX1132-DPP-NEXT:    v_mov_b32_e32 v4, v9
8907; GFX1132-DPP-NEXT:    s_mov_b32 s46, 0
8908; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
8909; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_2)
8910; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v1
8911; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB11_3
8912; GFX1132-DPP-NEXT:  ; %bb.1:
8913; GFX1132-DPP-NEXT:    s_load_b64 s[44:45], s[34:35], 0x24
8914; GFX1132-DPP-NEXT:    v_max_f64 v[41:42], v[3:4], v[3:4]
8915; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8916; GFX1132-DPP-NEXT:    global_load_b64 v[1:2], v0, s[44:45]
8917; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x1
8918; GFX1132-DPP-NEXT:    .p2align 6
8919; GFX1132-DPP-NEXT:  .LBB11_2: ; %atomicrmw.start
8920; GFX1132-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
8921; GFX1132-DPP-NEXT:    s_waitcnt vmcnt(0)
8922; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[1:2], v[1:2]
8923; GFX1132-DPP-NEXT:    s_add_u32 s8, s34, 44
8924; GFX1132-DPP-NEXT:    s_addc_u32 s9, s35, 0
8925; GFX1132-DPP-NEXT:    s_getpc_b64 s[0:1]
8926; GFX1132-DPP-NEXT:    s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4
8927; GFX1132-DPP-NEXT:    s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12
8928; GFX1132-DPP-NEXT:    v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
8929; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
8930; GFX1132-DPP-NEXT:    v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
8931; GFX1132-DPP-NEXT:    v_mov_b32_e32 v7, 0
8932; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[40:41]
8933; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[38:39]
8934; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[36:37]
8935; GFX1132-DPP-NEXT:    s_mov_b32 s12, s43
8936; GFX1132-DPP-NEXT:    s_mov_b32 s13, s42
8937; GFX1132-DPP-NEXT:    s_mov_b32 s14, s33
8938; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_4)
8939; GFX1132-DPP-NEXT:    v_max_f64 v[3:4], v[3:4], v[41:42]
8940; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[1:2], off
8941; GFX1132-DPP-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
8942; GFX1132-DPP-NEXT:    scratch_store_b64 off, v[3:4], off offset:8
8943; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
8944; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
8945; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8946; GFX1132-DPP-NEXT:    scratch_load_b64 v[1:2], off, off
8947; GFX1132-DPP-NEXT:    v_and_b32_e32 v0, 1, v0
8948; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
8949; GFX1132-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
8950; GFX1132-DPP-NEXT:    s_or_b32 s46, vcc_lo, s46
8951; GFX1132-DPP-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s46
8952; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB11_2
8953; GFX1132-DPP-NEXT:  .LBB11_3:
8954; GFX1132-DPP-NEXT:    s_set_inst_prefetch_distance 0x2
8955; GFX1132-DPP-NEXT:    s_endpgm
8956  %divValue = call double @div.double.value()
8957  %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue monotonic, align 4, !amdgpu.no.fine.grained.memory !1
8958  ret void
8959}
8960
8961define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
8962; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
8963; GFX7LESS:       ; %bb.0:
8964; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
8965; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
8966; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
8967; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
8968; GFX7LESS-NEXT:    s_cbranch_execz .LBB12_3
8969; GFX7LESS-NEXT:  ; %bb.1:
8970; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
8971; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
8972; GFX7LESS-NEXT:    s_load_dword s2, s[0:1], 0x0
8973; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
8974; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
8975; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
8976; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s2
8977; GFX7LESS-NEXT:    s_mov_b32 s2, -1
8978; GFX7LESS-NEXT:  .LBB12_2: ; %atomicrmw.start
8979; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
8980; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
8981; GFX7LESS-NEXT:    v_max_f32_e32 v0, 4.0, v0
8982; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
8983; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v1
8984; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v0
8985; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
8986; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
8987; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
8988; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8989; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v2
8990; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8991; GFX7LESS-NEXT:    s_cbranch_execnz .LBB12_2
8992; GFX7LESS-NEXT:  .LBB12_3:
8993; GFX7LESS-NEXT:    s_endpgm
8994;
8995; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
8996; GFX9:       ; %bb.0:
8997; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
8998; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
8999; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9000; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9001; GFX9-NEXT:    s_cbranch_execz .LBB12_3
9002; GFX9-NEXT:  ; %bb.1:
9003; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9004; GFX9-NEXT:    s_mov_b64 s[2:3], 0
9005; GFX9-NEXT:    v_mov_b32_e32 v2, 0
9006; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
9007; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x0
9008; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
9009; GFX9-NEXT:    v_mov_b32_e32 v1, s4
9010; GFX9-NEXT:  .LBB12_2: ; %atomicrmw.start
9011; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
9012; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
9013; GFX9-NEXT:    v_max_f32_e32 v0, 4.0, v0
9014; GFX9-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
9015; GFX9-NEXT:    s_waitcnt vmcnt(0)
9016; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
9017; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
9018; GFX9-NEXT:    v_mov_b32_e32 v1, v0
9019; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
9020; GFX9-NEXT:    s_cbranch_execnz .LBB12_2
9021; GFX9-NEXT:  .LBB12_3:
9022; GFX9-NEXT:    s_endpgm
9023;
9024; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9025; GFX1064:       ; %bb.0:
9026; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9027; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9028; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9029; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9030; GFX1064-NEXT:    s_cbranch_execz .LBB12_2
9031; GFX1064-NEXT:  ; %bb.1:
9032; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9033; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
9034; GFX1064-NEXT:    v_mov_b32_e32 v1, 4.0
9035; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
9036; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9037; GFX1064-NEXT:  .LBB12_2:
9038; GFX1064-NEXT:    s_endpgm
9039;
9040; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9041; GFX1032:       ; %bb.0:
9042; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9043; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
9044; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
9045; GFX1032-NEXT:    s_cbranch_execz .LBB12_2
9046; GFX1032-NEXT:  ; %bb.1:
9047; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9048; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
9049; GFX1032-NEXT:    v_mov_b32_e32 v1, 4.0
9050; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
9051; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9052; GFX1032-NEXT:  .LBB12_2:
9053; GFX1032-NEXT:    s_endpgm
9054;
9055; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9056; GFX1164:       ; %bb.0:
9057; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9058; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
9059; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9060; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9061; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
9062; GFX1164-NEXT:    s_cbranch_execz .LBB12_2
9063; GFX1164-NEXT:  ; %bb.1:
9064; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9065; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
9066; GFX1164-NEXT:    v_mov_b32_e32 v1, 4.0
9067; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
9068; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9069; GFX1164-NEXT:  .LBB12_2:
9070; GFX1164-NEXT:    s_endpgm
9071;
9072; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9073; GFX1132:       ; %bb.0:
9074; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9075; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
9076; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9077; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
9078; GFX1132-NEXT:    s_cbranch_execz .LBB12_2
9079; GFX1132-NEXT:  ; %bb.1:
9080; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9081; GFX1132-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
9082; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
9083; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9084; GFX1132-NEXT:  .LBB12_2:
9085; GFX1132-NEXT:    s_endpgm
9086;
9087; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9088; GFX7LESS-DPP:       ; %bb.0:
9089; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
9090; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
9091; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9092; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9093; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB12_3
9094; GFX7LESS-DPP-NEXT:  ; %bb.1:
9095; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
9096; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9097; GFX7LESS-DPP-NEXT:    s_load_dword s2, s[0:1], 0x0
9098; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
9099; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
9100; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9101; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s2
9102; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
9103; GFX7LESS-DPP-NEXT:  .LBB12_2: ; %atomicrmw.start
9104; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
9105; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
9106; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
9107; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
9108; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v1
9109; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v0
9110; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
9111; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
9112; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
9113; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
9114; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v2
9115; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
9116; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB12_2
9117; GFX7LESS-DPP-NEXT:  .LBB12_3:
9118; GFX7LESS-DPP-NEXT:    s_endpgm
9119;
9120; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9121; GFX9-DPP:       ; %bb.0:
9122; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9123; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9124; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9125; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9126; GFX9-DPP-NEXT:    s_cbranch_execz .LBB12_3
9127; GFX9-DPP-NEXT:  ; %bb.1:
9128; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9129; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
9130; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
9131; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9132; GFX9-DPP-NEXT:    s_load_dword s4, s[0:1], 0x0
9133; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9134; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s4
9135; GFX9-DPP-NEXT:  .LBB12_2: ; %atomicrmw.start
9136; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
9137; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
9138; GFX9-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
9139; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
9140; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
9141; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
9142; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
9143; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
9144; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
9145; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB12_2
9146; GFX9-DPP-NEXT:  .LBB12_3:
9147; GFX9-DPP-NEXT:    s_endpgm
9148;
9149; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9150; GFX1064-DPP:       ; %bb.0:
9151; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9152; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9153; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9154; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9155; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB12_2
9156; GFX1064-DPP-NEXT:  ; %bb.1:
9157; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9158; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
9159; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9160; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9161; GFX1064-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9162; GFX1064-DPP-NEXT:  .LBB12_2:
9163; GFX1064-DPP-NEXT:    s_endpgm
9164;
9165; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9166; GFX1032-DPP:       ; %bb.0:
9167; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9168; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
9169; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
9170; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB12_2
9171; GFX1032-DPP-NEXT:  ; %bb.1:
9172; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9173; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
9174; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9175; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9176; GFX1032-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9177; GFX1032-DPP-NEXT:  .LBB12_2:
9178; GFX1032-DPP-NEXT:    s_endpgm
9179;
9180; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9181; GFX1164-DPP:       ; %bb.0:
9182; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9183; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
9184; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9185; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9186; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
9187; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB12_2
9188; GFX1164-DPP-NEXT:  ; %bb.1:
9189; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9190; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
9191; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9192; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9193; GFX1164-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9194; GFX1164-DPP-NEXT:  .LBB12_2:
9195; GFX1164-DPP-NEXT:    s_endpgm
9196;
9197; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9198; GFX1132-DPP:       ; %bb.0:
9199; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9200; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
9201; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9202; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
9203; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB12_2
9204; GFX1132-DPP-NEXT:  ; %bb.1:
9205; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9206; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
9207; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9208; GFX1132-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9209; GFX1132-DPP-NEXT:  .LBB12_2:
9210; GFX1132-DPP-NEXT:    s_endpgm
9211  %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0  monotonic, align 4, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1, !amdgpu.ignore.denormal.mode !1
9212  ret void
9213}
9214
9215define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
9216; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9217; GFX7LESS:       ; %bb.0:
9218; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
9219; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
9220; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9221; GFX7LESS-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9222; GFX7LESS-NEXT:    s_cbranch_execz .LBB13_3
9223; GFX7LESS-NEXT:  ; %bb.1:
9224; GFX7LESS-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
9225; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
9226; GFX7LESS-NEXT:    s_load_dword s2, s[0:1], 0x0
9227; GFX7LESS-NEXT:    s_mov_b64 s[4:5], 0
9228; GFX7LESS-NEXT:    s_mov_b32 s3, 0xf000
9229; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
9230; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s2
9231; GFX7LESS-NEXT:    s_mov_b32 s2, -1
9232; GFX7LESS-NEXT:  .LBB13_2: ; %atomicrmw.start
9233; GFX7LESS-NEXT:    ; =>This Inner Loop Header: Depth=1
9234; GFX7LESS-NEXT:    v_mul_f32_e32 v0, 1.0, v1
9235; GFX7LESS-NEXT:    v_max_f32_e32 v0, 4.0, v0
9236; GFX7LESS-NEXT:    s_waitcnt expcnt(0)
9237; GFX7LESS-NEXT:    v_mov_b32_e32 v3, v1
9238; GFX7LESS-NEXT:    v_mov_b32_e32 v2, v0
9239; GFX7LESS-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
9240; GFX7LESS-NEXT:    s_waitcnt vmcnt(0)
9241; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
9242; GFX7LESS-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
9243; GFX7LESS-NEXT:    v_mov_b32_e32 v1, v2
9244; GFX7LESS-NEXT:    s_andn2_b64 exec, exec, s[4:5]
9245; GFX7LESS-NEXT:    s_cbranch_execnz .LBB13_2
9246; GFX7LESS-NEXT:  .LBB13_3:
9247; GFX7LESS-NEXT:    s_endpgm
9248;
9249; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9250; GFX9:       ; %bb.0:
9251; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9252; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9253; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9254; GFX9-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9255; GFX9-NEXT:    s_cbranch_execz .LBB13_3
9256; GFX9-NEXT:  ; %bb.1:
9257; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9258; GFX9-NEXT:    s_mov_b64 s[2:3], 0
9259; GFX9-NEXT:    v_mov_b32_e32 v2, 0
9260; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
9261; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x0
9262; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
9263; GFX9-NEXT:    v_mov_b32_e32 v1, s4
9264; GFX9-NEXT:  .LBB13_2: ; %atomicrmw.start
9265; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
9266; GFX9-NEXT:    v_max_f32_e32 v0, v1, v1
9267; GFX9-NEXT:    v_max_f32_e32 v0, 4.0, v0
9268; GFX9-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
9269; GFX9-NEXT:    s_waitcnt vmcnt(0)
9270; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
9271; GFX9-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
9272; GFX9-NEXT:    v_mov_b32_e32 v1, v0
9273; GFX9-NEXT:    s_andn2_b64 exec, exec, s[2:3]
9274; GFX9-NEXT:    s_cbranch_execnz .LBB13_2
9275; GFX9-NEXT:  .LBB13_3:
9276; GFX9-NEXT:    s_endpgm
9277;
9278; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9279; GFX1064:       ; %bb.0:
9280; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9281; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9282; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9283; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9284; GFX1064-NEXT:    s_cbranch_execz .LBB13_2
9285; GFX1064-NEXT:  ; %bb.1:
9286; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9287; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
9288; GFX1064-NEXT:    v_mov_b32_e32 v1, 4.0
9289; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
9290; GFX1064-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9291; GFX1064-NEXT:  .LBB13_2:
9292; GFX1064-NEXT:    s_endpgm
9293;
9294; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9295; GFX1032:       ; %bb.0:
9296; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9297; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
9298; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
9299; GFX1032-NEXT:    s_cbranch_execz .LBB13_2
9300; GFX1032-NEXT:  ; %bb.1:
9301; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9302; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
9303; GFX1032-NEXT:    v_mov_b32_e32 v1, 4.0
9304; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
9305; GFX1032-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9306; GFX1032-NEXT:  .LBB13_2:
9307; GFX1032-NEXT:    s_endpgm
9308;
9309; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9310; GFX1164:       ; %bb.0:
9311; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9312; GFX1164-NEXT:    s_mov_b64 s[0:1], exec
9313; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9314; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9315; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
9316; GFX1164-NEXT:    s_cbranch_execz .LBB13_2
9317; GFX1164-NEXT:  ; %bb.1:
9318; GFX1164-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9319; GFX1164-NEXT:    v_mov_b32_e32 v0, 0
9320; GFX1164-NEXT:    v_mov_b32_e32 v1, 4.0
9321; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
9322; GFX1164-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9323; GFX1164-NEXT:  .LBB13_2:
9324; GFX1164-NEXT:    s_endpgm
9325;
9326; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9327; GFX1132:       ; %bb.0:
9328; GFX1132-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9329; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
9330; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9331; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
9332; GFX1132-NEXT:    s_cbranch_execz .LBB13_2
9333; GFX1132-NEXT:  ; %bb.1:
9334; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9335; GFX1132-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
9336; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
9337; GFX1132-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9338; GFX1132-NEXT:  .LBB13_2:
9339; GFX1132-NEXT:    s_endpgm
9340;
9341; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9342; GFX7LESS-DPP:       ; %bb.0:
9343; GFX7LESS-DPP-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
9344; GFX7LESS-DPP-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
9345; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9346; GFX7LESS-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9347; GFX7LESS-DPP-NEXT:    s_cbranch_execz .LBB13_3
9348; GFX7LESS-DPP-NEXT:  ; %bb.1:
9349; GFX7LESS-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
9350; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9351; GFX7LESS-DPP-NEXT:    s_load_dword s2, s[0:1], 0x0
9352; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], 0
9353; GFX7LESS-DPP-NEXT:    s_mov_b32 s3, 0xf000
9354; GFX7LESS-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9355; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, s2
9356; GFX7LESS-DPP-NEXT:    s_mov_b32 s2, -1
9357; GFX7LESS-DPP-NEXT:  .LBB13_2: ; %atomicrmw.start
9358; GFX7LESS-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
9359; GFX7LESS-DPP-NEXT:    v_mul_f32_e32 v0, 1.0, v1
9360; GFX7LESS-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
9361; GFX7LESS-DPP-NEXT:    s_waitcnt expcnt(0)
9362; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v3, v1
9363; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v2, v0
9364; GFX7LESS-DPP-NEXT:    buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc
9365; GFX7LESS-DPP-NEXT:    s_waitcnt vmcnt(0)
9366; GFX7LESS-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
9367; GFX7LESS-DPP-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
9368; GFX7LESS-DPP-NEXT:    v_mov_b32_e32 v1, v2
9369; GFX7LESS-DPP-NEXT:    s_andn2_b64 exec, exec, s[4:5]
9370; GFX7LESS-DPP-NEXT:    s_cbranch_execnz .LBB13_2
9371; GFX7LESS-DPP-NEXT:  .LBB13_3:
9372; GFX7LESS-DPP-NEXT:    s_endpgm
9373;
9374; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9375; GFX9-DPP:       ; %bb.0:
9376; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9377; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9378; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9379; GFX9-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9380; GFX9-DPP-NEXT:    s_cbranch_execz .LBB13_3
9381; GFX9-DPP-NEXT:  ; %bb.1:
9382; GFX9-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9383; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], 0
9384; GFX9-DPP-NEXT:    v_mov_b32_e32 v2, 0
9385; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9386; GFX9-DPP-NEXT:    s_load_dword s4, s[0:1], 0x0
9387; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9388; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, s4
9389; GFX9-DPP-NEXT:  .LBB13_2: ; %atomicrmw.start
9390; GFX9-DPP-NEXT:    ; =>This Inner Loop Header: Depth=1
9391; GFX9-DPP-NEXT:    v_max_f32_e32 v0, v1, v1
9392; GFX9-DPP-NEXT:    v_max_f32_e32 v0, 4.0, v0
9393; GFX9-DPP-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
9394; GFX9-DPP-NEXT:    s_waitcnt vmcnt(0)
9395; GFX9-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
9396; GFX9-DPP-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
9397; GFX9-DPP-NEXT:    v_mov_b32_e32 v1, v0
9398; GFX9-DPP-NEXT:    s_andn2_b64 exec, exec, s[2:3]
9399; GFX9-DPP-NEXT:    s_cbranch_execnz .LBB13_2
9400; GFX9-DPP-NEXT:  .LBB13_3:
9401; GFX9-DPP-NEXT:    s_endpgm
9402;
9403; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9404; GFX1064-DPP:       ; %bb.0:
9405; GFX1064-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9406; GFX1064-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9407; GFX1064-DPP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
9408; GFX1064-DPP-NEXT:    s_and_saveexec_b64 s[0:1], vcc
9409; GFX1064-DPP-NEXT:    s_cbranch_execz .LBB13_2
9410; GFX1064-DPP-NEXT:  ; %bb.1:
9411; GFX1064-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9412; GFX1064-DPP-NEXT:    v_mov_b32_e32 v0, 0
9413; GFX1064-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9414; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9415; GFX1064-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9416; GFX1064-DPP-NEXT:  .LBB13_2:
9417; GFX1064-DPP-NEXT:    s_endpgm
9418;
9419; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9420; GFX1032-DPP:       ; %bb.0:
9421; GFX1032-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9422; GFX1032-DPP-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
9423; GFX1032-DPP-NEXT:    s_and_saveexec_b32 s0, vcc_lo
9424; GFX1032-DPP-NEXT:    s_cbranch_execz .LBB13_2
9425; GFX1032-DPP-NEXT:  ; %bb.1:
9426; GFX1032-DPP-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9427; GFX1032-DPP-NEXT:    v_mov_b32_e32 v0, 0
9428; GFX1032-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9429; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9430; GFX1032-DPP-NEXT:    global_atomic_fmax v0, v1, s[0:1]
9431; GFX1032-DPP-NEXT:  .LBB13_2:
9432; GFX1032-DPP-NEXT:    s_endpgm
9433;
9434; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9435; GFX1164-DPP:       ; %bb.0:
9436; GFX1164-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9437; GFX1164-DPP-NEXT:    s_mov_b64 s[0:1], exec
9438; GFX1164-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9439; GFX1164-DPP-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
9440; GFX1164-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
9441; GFX1164-DPP-NEXT:    s_cbranch_execz .LBB13_2
9442; GFX1164-DPP-NEXT:  ; %bb.1:
9443; GFX1164-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9444; GFX1164-DPP-NEXT:    v_mov_b32_e32 v0, 0
9445; GFX1164-DPP-NEXT:    v_mov_b32_e32 v1, 4.0
9446; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9447; GFX1164-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9448; GFX1164-DPP-NEXT:  .LBB13_2:
9449; GFX1164-DPP-NEXT:    s_endpgm
9450;
9451; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
9452; GFX1132-DPP:       ; %bb.0:
9453; GFX1132-DPP-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
9454; GFX1132-DPP-NEXT:    s_mov_b32 s0, exec_lo
9455; GFX1132-DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9456; GFX1132-DPP-NEXT:    v_cmpx_eq_u32_e32 0, v0
9457; GFX1132-DPP-NEXT:    s_cbranch_execz .LBB13_2
9458; GFX1132-DPP-NEXT:  ; %bb.1:
9459; GFX1132-DPP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
9460; GFX1132-DPP-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
9461; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
9462; GFX1132-DPP-NEXT:    global_atomic_max_f32 v0, v1, s[0:1]
9463; GFX1132-DPP-NEXT:  .LBB13_2:
9464; GFX1132-DPP-NEXT:    s_endpgm
9465  %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0  monotonic, align 4, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1
9466  ret void
9467}
9468
9469attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
9470
9471!llvm.module.flags = !{!0}
9472!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
9473!1 = !{}
9474
9475