1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32, -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s 8; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7LESS-DPP %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DPP %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064-DPP %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032-DPP %s 12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164-DPP %s 13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132-DPP %s 14 15declare float @div.float.value() 16declare float @div.double.value() 17 18define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 { 19; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 20; GFX7LESS: ; %bb.0: 21; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 22; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 23; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 24; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 25; GFX7LESS-NEXT: s_cbranch_execz .LBB0_3 26; GFX7LESS-NEXT: ; %bb.1: 27; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 28; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 29; GFX7LESS-NEXT: s_load_dword s2, s[0:1], 0x0 30; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 31; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 32; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 33; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 34; GFX7LESS-NEXT: s_mov_b32 s2, -1 35; GFX7LESS-NEXT: .LBB0_2: ; %atomicrmw.start 36; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 37; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 38; GFX7LESS-NEXT: v_max_f32_e32 v0, 4.0, v0 39; GFX7LESS-NEXT: s_waitcnt expcnt(0) 40; GFX7LESS-NEXT: v_mov_b32_e32 v3, v1 41; GFX7LESS-NEXT: v_mov_b32_e32 v2, v0 42; GFX7LESS-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 43; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 44; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 45; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 46; GFX7LESS-NEXT: v_mov_b32_e32 v1, v2 47; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 48; GFX7LESS-NEXT: s_cbranch_execnz .LBB0_2 49; GFX7LESS-NEXT: .LBB0_3: 50; GFX7LESS-NEXT: s_endpgm 51; 52; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 53; GFX9: ; %bb.0: 54; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 55; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 56; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 57; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 58; GFX9-NEXT: s_cbranch_execz .LBB0_3 59; GFX9-NEXT: ; %bb.1: 60; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 61; GFX9-NEXT: s_mov_b64 s[2:3], 0 62; GFX9-NEXT: v_mov_b32_e32 v2, 0 63; GFX9-NEXT: s_waitcnt lgkmcnt(0) 64; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 65; GFX9-NEXT: s_waitcnt lgkmcnt(0) 66; GFX9-NEXT: v_mov_b32_e32 v1, s4 67; GFX9-NEXT: .LBB0_2: ; %atomicrmw.start 68; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 69; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 70; GFX9-NEXT: v_max_f32_e32 v0, 4.0, v0 71; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 72; GFX9-NEXT: s_waitcnt vmcnt(0) 73; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 74; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 75; GFX9-NEXT: v_mov_b32_e32 v1, v0 76; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 77; GFX9-NEXT: s_cbranch_execnz .LBB0_2 78; GFX9-NEXT: .LBB0_3: 79; GFX9-NEXT: s_endpgm 80; 81; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 82; GFX1064: ; %bb.0: 83; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 84; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 85; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 86; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 87; GFX1064-NEXT: s_cbranch_execz .LBB0_2 88; GFX1064-NEXT: ; %bb.1: 89; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 90; GFX1064-NEXT: v_mov_b32_e32 v0, 0 91; GFX1064-NEXT: v_mov_b32_e32 v1, 4.0 92; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 93; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 94; GFX1064-NEXT: .LBB0_2: 95; GFX1064-NEXT: s_endpgm 96; 97; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 98; GFX1032: ; %bb.0: 99; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 100; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 101; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 102; GFX1032-NEXT: s_cbranch_execz .LBB0_2 103; GFX1032-NEXT: ; %bb.1: 104; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 105; GFX1032-NEXT: v_mov_b32_e32 v0, 0 106; GFX1032-NEXT: v_mov_b32_e32 v1, 4.0 107; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 108; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 109; GFX1032-NEXT: .LBB0_2: 110; GFX1032-NEXT: s_endpgm 111; 112; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 113; GFX1164: ; %bb.0: 114; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 115; GFX1164-NEXT: s_mov_b64 s[0:1], exec 116; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 117; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 118; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 119; GFX1164-NEXT: s_cbranch_execz .LBB0_2 120; GFX1164-NEXT: ; %bb.1: 121; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 122; GFX1164-NEXT: v_mov_b32_e32 v0, 0 123; GFX1164-NEXT: v_mov_b32_e32 v1, 4.0 124; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 125; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 126; GFX1164-NEXT: .LBB0_2: 127; GFX1164-NEXT: s_endpgm 128; 129; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 130; GFX1132: ; %bb.0: 131; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 132; GFX1132-NEXT: s_mov_b32 s0, exec_lo 133; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 134; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 135; GFX1132-NEXT: s_cbranch_execz .LBB0_2 136; GFX1132-NEXT: ; %bb.1: 137; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 138; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 139; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 140; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 141; GFX1132-NEXT: .LBB0_2: 142; GFX1132-NEXT: s_endpgm 143; 144; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 145; GFX7LESS-DPP: ; %bb.0: 146; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 147; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 148; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 149; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 150; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB0_3 151; GFX7LESS-DPP-NEXT: ; %bb.1: 152; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 153; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 154; GFX7LESS-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 155; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 156; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 157; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 158; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s2 159; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 160; GFX7LESS-DPP-NEXT: .LBB0_2: ; %atomicrmw.start 161; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 162; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 163; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 164; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 165; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 166; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v0 167; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 168; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 169; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 170; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 171; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v2 172; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 173; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB0_2 174; GFX7LESS-DPP-NEXT: .LBB0_3: 175; GFX7LESS-DPP-NEXT: s_endpgm 176; 177; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 178; GFX9-DPP: ; %bb.0: 179; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 180; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 181; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 182; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 183; GFX9-DPP-NEXT: s_cbranch_execz .LBB0_3 184; GFX9-DPP-NEXT: ; %bb.1: 185; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 186; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 187; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 188; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 189; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 190; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 191; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 192; GFX9-DPP-NEXT: .LBB0_2: ; %atomicrmw.start 193; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 194; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 195; GFX9-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 196; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 197; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 198; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 199; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 200; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 201; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 202; GFX9-DPP-NEXT: s_cbranch_execnz .LBB0_2 203; GFX9-DPP-NEXT: .LBB0_3: 204; GFX9-DPP-NEXT: s_endpgm 205; 206; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 207; GFX1064-DPP: ; %bb.0: 208; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 209; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 210; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 211; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 212; GFX1064-DPP-NEXT: s_cbranch_execz .LBB0_2 213; GFX1064-DPP-NEXT: ; %bb.1: 214; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 215; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 216; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 4.0 217; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 218; GFX1064-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 219; GFX1064-DPP-NEXT: .LBB0_2: 220; GFX1064-DPP-NEXT: s_endpgm 221; 222; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 223; GFX1032-DPP: ; %bb.0: 224; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 225; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 226; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 227; GFX1032-DPP-NEXT: s_cbranch_execz .LBB0_2 228; GFX1032-DPP-NEXT: ; %bb.1: 229; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 230; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 231; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 4.0 232; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 233; GFX1032-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 234; GFX1032-DPP-NEXT: .LBB0_2: 235; GFX1032-DPP-NEXT: s_endpgm 236; 237; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 238; GFX1164-DPP: ; %bb.0: 239; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 240; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 241; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 242; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 243; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 244; GFX1164-DPP-NEXT: s_cbranch_execz .LBB0_2 245; GFX1164-DPP-NEXT: ; %bb.1: 246; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 247; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 248; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 4.0 249; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 250; GFX1164-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 251; GFX1164-DPP-NEXT: .LBB0_2: 252; GFX1164-DPP-NEXT: s_endpgm 253; 254; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe: 255; GFX1132-DPP: ; %bb.0: 256; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 257; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 258; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 259; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 260; GFX1132-DPP-NEXT: s_cbranch_execz .LBB0_2 261; GFX1132-DPP-NEXT: ; %bb.1: 262; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 263; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 264; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 265; GFX1132-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 266; GFX1132-DPP-NEXT: .LBB0_2: 267; GFX1132-DPP-NEXT: s_endpgm 268 %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 269 ret void 270} 271 272define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 { 273; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 274; GFX7LESS: ; %bb.0: 275; GFX7LESS-NEXT: s_mov_b32 s32, 0 276; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 277; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 278; GFX7LESS-NEXT: s_mov_b32 s38, -1 279; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 280; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 281; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 282; GFX7LESS-NEXT: s_mov_b32 s14, s10 283; GFX7LESS-NEXT: s_mov_b32 s13, s9 284; GFX7LESS-NEXT: s_mov_b32 s12, s8 285; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[6:7] 286; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5] 287; GFX7LESS-NEXT: s_add_u32 s8, s34, 44 288; GFX7LESS-NEXT: s_addc_u32 s9, s35, 0 289; GFX7LESS-NEXT: s_getpc_b64 s[4:5] 290; GFX7LESS-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 291; GFX7LESS-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 292; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 293; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 294; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 295; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 296; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 297; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] 298; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 299; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] 300; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] 301; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 302; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 303; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 304; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0x7fc00000 305; GFX7LESS-NEXT: .LBB1_1: ; %ComputeLoop 306; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 307; GFX7LESS-NEXT: s_ff1_i32_b64 s2, s[0:1] 308; GFX7LESS-NEXT: v_mul_f32_e32 v1, 1.0, v2 309; GFX7LESS-NEXT: v_readlane_b32 s4, v0, s2 310; GFX7LESS-NEXT: s_lshl_b64 s[2:3], 1, s2 311; GFX7LESS-NEXT: v_mul_f32_e64 v2, 1.0, s4 312; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 313; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 314; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 315; GFX7LESS-NEXT: v_max_f32_e32 v2, v1, v2 316; GFX7LESS-NEXT: s_cbranch_vccnz .LBB1_1 317; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 318; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 319; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 320; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 321; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 322; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 323; GFX7LESS-NEXT: s_cbranch_execz .LBB1_5 324; GFX7LESS-NEXT: ; %bb.3: 325; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x9 326; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 327; GFX7LESS-NEXT: s_mov_b32 s2, -1 328; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 329; GFX7LESS-NEXT: buffer_load_dword v1, off, s[0:3], 0 330; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 331; GFX7LESS-NEXT: v_mul_f32_e32 v2, 1.0, v2 332; GFX7LESS-NEXT: .LBB1_4: ; %atomicrmw.start 333; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 334; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 335; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 336; GFX7LESS-NEXT: v_max_f32_e32 v0, v0, v2 337; GFX7LESS-NEXT: s_waitcnt expcnt(0) 338; GFX7LESS-NEXT: v_mov_b32_e32 v4, v1 339; GFX7LESS-NEXT: v_mov_b32_e32 v3, v0 340; GFX7LESS-NEXT: buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc 341; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 342; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 343; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 344; GFX7LESS-NEXT: v_mov_b32_e32 v1, v3 345; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 346; GFX7LESS-NEXT: s_cbranch_execnz .LBB1_4 347; GFX7LESS-NEXT: .LBB1_5: 348; GFX7LESS-NEXT: s_endpgm 349; 350; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 351; GFX9: ; %bb.0: 352; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 353; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 354; GFX9-NEXT: s_mov_b32 s38, -1 355; GFX9-NEXT: s_mov_b32 s39, 0xe00000 356; GFX9-NEXT: s_add_u32 s36, s36, s11 357; GFX9-NEXT: s_addc_u32 s37, s37, 0 358; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] 359; GFX9-NEXT: s_mov_b32 s12, s8 360; GFX9-NEXT: s_add_u32 s8, s34, 44 361; GFX9-NEXT: s_mov_b32 s13, s9 362; GFX9-NEXT: s_addc_u32 s9, s35, 0 363; GFX9-NEXT: s_getpc_b64 s[4:5] 364; GFX9-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 365; GFX9-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 366; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 367; GFX9-NEXT: s_mov_b32 s14, s10 368; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] 369; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 370; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 371; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] 372; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 373; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 374; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 375; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 376; GFX9-NEXT: s_mov_b32 s32, 0 377; GFX9-NEXT: s_waitcnt lgkmcnt(0) 378; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 379; GFX9-NEXT: s_mov_b64 s[0:1], exec 380; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 381; GFX9-NEXT: .LBB1_1: ; %ComputeLoop 382; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 383; GFX9-NEXT: s_ff1_i32_b64 s2, s[0:1] 384; GFX9-NEXT: v_readlane_b32 s4, v0, s2 385; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s2 386; GFX9-NEXT: v_max_f32_e32 v1, v2, v2 387; GFX9-NEXT: v_max_f32_e64 v2, s4, s4 388; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 389; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 390; GFX9-NEXT: v_max_f32_e32 v2, v1, v2 391; GFX9-NEXT: s_cbranch_scc1 .LBB1_1 392; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 393; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 394; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 395; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 396; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 397; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 398; GFX9-NEXT: s_cbranch_execz .LBB1_5 399; GFX9-NEXT: ; %bb.3: 400; GFX9-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 401; GFX9-NEXT: v_mov_b32_e32 v3, 0 402; GFX9-NEXT: s_mov_b64 s[2:3], 0 403; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 404; GFX9-NEXT: s_waitcnt lgkmcnt(0) 405; GFX9-NEXT: global_load_dword v1, v3, s[0:1] 406; GFX9-NEXT: .LBB1_4: ; %atomicrmw.start 407; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 408; GFX9-NEXT: s_waitcnt vmcnt(0) 409; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 410; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 411; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc 412; GFX9-NEXT: s_waitcnt vmcnt(0) 413; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 414; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 415; GFX9-NEXT: v_mov_b32_e32 v1, v0 416; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 417; GFX9-NEXT: s_cbranch_execnz .LBB1_4 418; GFX9-NEXT: .LBB1_5: 419; GFX9-NEXT: s_endpgm 420; 421; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 422; GFX1064: ; %bb.0: 423; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 424; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 425; GFX1064-NEXT: s_mov_b32 s38, -1 426; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 427; GFX1064-NEXT: s_add_u32 s36, s36, s11 428; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 429; GFX1064-NEXT: s_addc_u32 s37, s37, 0 430; GFX1064-NEXT: s_mov_b32 s12, s8 431; GFX1064-NEXT: s_add_u32 s8, s34, 44 432; GFX1064-NEXT: s_mov_b32 s13, s9 433; GFX1064-NEXT: s_addc_u32 s9, s35, 0 434; GFX1064-NEXT: s_getpc_b64 s[4:5] 435; GFX1064-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 436; GFX1064-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 437; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 438; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 439; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 440; GFX1064-NEXT: s_mov_b32 s14, s10 441; GFX1064-NEXT: s_mov_b64 s[10:11], s[6:7] 442; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] 443; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 444; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 445; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] 446; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] 447; GFX1064-NEXT: s_mov_b32 s32, 0 448; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 449; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 450; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 451; GFX1064-NEXT: s_mov_b64 s[0:1], exec 452; GFX1064-NEXT: .LBB1_1: ; %ComputeLoop 453; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 454; GFX1064-NEXT: s_ff1_i32_b64 s2, s[0:1] 455; GFX1064-NEXT: v_max_f32_e32 v1, v1, v1 456; GFX1064-NEXT: v_readlane_b32 s3, v0, s2 457; GFX1064-NEXT: v_max_f32_e64 v2, s3, s3 458; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s2 459; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 460; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 461; GFX1064-NEXT: v_max_f32_e32 v1, v1, v2 462; GFX1064-NEXT: s_cbranch_scc1 .LBB1_1 463; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 464; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 465; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 466; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 467; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 468; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 469; GFX1064-NEXT: s_cbranch_execz .LBB1_4 470; GFX1064-NEXT: ; %bb.3: 471; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 472; GFX1064-NEXT: v_mov_b32_e32 v0, 0 473; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 474; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 475; GFX1064-NEXT: .LBB1_4: 476; GFX1064-NEXT: s_endpgm 477; 478; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 479; GFX1032: ; %bb.0: 480; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 481; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 482; GFX1032-NEXT: s_mov_b32 s38, -1 483; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 484; GFX1032-NEXT: s_add_u32 s36, s36, s11 485; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 486; GFX1032-NEXT: s_addc_u32 s37, s37, 0 487; GFX1032-NEXT: s_mov_b32 s12, s8 488; GFX1032-NEXT: s_add_u32 s8, s34, 44 489; GFX1032-NEXT: s_mov_b32 s13, s9 490; GFX1032-NEXT: s_addc_u32 s9, s35, 0 491; GFX1032-NEXT: s_getpc_b64 s[4:5] 492; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 493; GFX1032-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 494; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 495; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 496; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 497; GFX1032-NEXT: s_mov_b32 s14, s10 498; GFX1032-NEXT: s_mov_b64 s[10:11], s[6:7] 499; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] 500; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 501; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 502; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] 503; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] 504; GFX1032-NEXT: s_mov_b32 s32, 0 505; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 506; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 507; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 508; GFX1032-NEXT: s_mov_b32 s0, exec_lo 509; GFX1032-NEXT: .LBB1_1: ; %ComputeLoop 510; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 511; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 512; GFX1032-NEXT: v_max_f32_e32 v1, v1, v1 513; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 514; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 515; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 516; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 517; GFX1032-NEXT: v_max_f32_e64 v2, s2, s2 518; GFX1032-NEXT: v_max_f32_e32 v1, v1, v2 519; GFX1032-NEXT: s_cbranch_scc1 .LBB1_1 520; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 521; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 522; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 523; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 524; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 525; GFX1032-NEXT: s_cbranch_execz .LBB1_4 526; GFX1032-NEXT: ; %bb.3: 527; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 528; GFX1032-NEXT: v_mov_b32_e32 v0, 0 529; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 530; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 531; GFX1032-NEXT: .LBB1_4: 532; GFX1032-NEXT: s_endpgm 533; 534; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 535; GFX1164: ; %bb.0: 536; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 537; GFX1164-NEXT: s_mov_b32 s12, s8 538; GFX1164-NEXT: s_add_u32 s8, s34, 44 539; GFX1164-NEXT: s_mov_b32 s13, s9 540; GFX1164-NEXT: s_addc_u32 s9, s35, 0 541; GFX1164-NEXT: s_getpc_b64 s[4:5] 542; GFX1164-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 543; GFX1164-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 544; GFX1164-NEXT: v_mov_b32_e32 v31, v0 545; GFX1164-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 546; GFX1164-NEXT: s_mov_b32 s14, s10 547; GFX1164-NEXT: s_mov_b64 s[10:11], s[6:7] 548; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] 549; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 550; GFX1164-NEXT: s_mov_b32 s32, 0 551; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 552; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] 553; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 554; GFX1164-NEXT: s_mov_b64 s[0:1], exec 555; GFX1164-NEXT: .LBB1_1: ; %ComputeLoop 556; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 557; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 558; GFX1164-NEXT: s_ctz_i32_b64 s2, s[0:1] 559; GFX1164-NEXT: v_max_f32_e32 v1, v1, v1 560; GFX1164-NEXT: v_readlane_b32 s3, v0, s2 561; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 562; GFX1164-NEXT: v_max_f32_e64 v2, s3, s3 563; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s2 564; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 565; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 566; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 567; GFX1164-NEXT: v_max_f32_e32 v1, v1, v2 568; GFX1164-NEXT: s_cbranch_scc1 .LBB1_1 569; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 570; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 571; GFX1164-NEXT: s_mov_b64 s[0:1], exec 572; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 573; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 574; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 575; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 576; GFX1164-NEXT: s_cbranch_execz .LBB1_4 577; GFX1164-NEXT: ; %bb.3: 578; GFX1164-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 579; GFX1164-NEXT: v_mov_b32_e32 v0, 0 580; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 581; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 582; GFX1164-NEXT: .LBB1_4: 583; GFX1164-NEXT: s_endpgm 584; 585; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 586; GFX1132: ; %bb.0: 587; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 588; GFX1132-NEXT: v_mov_b32_e32 v31, v0 589; GFX1132-NEXT: s_add_u32 s8, s34, 44 590; GFX1132-NEXT: s_addc_u32 s9, s35, 0 591; GFX1132-NEXT: s_getpc_b64 s[4:5] 592; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 593; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 594; GFX1132-NEXT: s_mov_b32 s12, s13 595; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 596; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] 597; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] 598; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 599; GFX1132-NEXT: s_mov_b32 s13, s14 600; GFX1132-NEXT: s_mov_b32 s14, s15 601; GFX1132-NEXT: s_mov_b32 s32, 0 602; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 603; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] 604; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 605; GFX1132-NEXT: s_mov_b32 s0, exec_lo 606; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop 607; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 608; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 609; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 610; GFX1132-NEXT: v_max_f32_e32 v1, v1, v1 611; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 612; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 613; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 614; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 615; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 616; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 617; GFX1132-NEXT: v_max_f32_e64 v2, s2, s2 618; GFX1132-NEXT: v_max_f32_e32 v1, v1, v2 619; GFX1132-NEXT: s_cbranch_scc1 .LBB1_1 620; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 621; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 622; GFX1132-NEXT: s_mov_b32 s0, exec_lo 623; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 624; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 625; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 626; GFX1132-NEXT: s_cbranch_execz .LBB1_4 627; GFX1132-NEXT: ; %bb.3: 628; GFX1132-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 629; GFX1132-NEXT: v_mov_b32_e32 v0, 0 630; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 631; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 632; GFX1132-NEXT: .LBB1_4: 633; GFX1132-NEXT: s_endpgm 634; 635; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 636; GFX7LESS-DPP: ; %bb.0: 637; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 638; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 639; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 640; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 641; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 642; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 643; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 644; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 645; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 646; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 647; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 648; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 649; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 650; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 651; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 652; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 653; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] 654; GFX7LESS-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 655; GFX7LESS-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 656; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 657; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 658; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 659; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 660; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 661; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 662; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 663; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] 664; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] 665; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 666; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 667; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 668; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 669; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 670; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start 671; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 672; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 673; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 674; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, v0, v2 675; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 676; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 677; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 678; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc 679; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 680; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 681; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 682; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v3 683; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[0:1] 684; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB1_1 685; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 686; GFX7LESS-DPP-NEXT: s_endpgm 687; 688; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 689; GFX9-DPP: ; %bb.0: 690; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 691; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 692; GFX9-DPP-NEXT: s_mov_b32 s38, -1 693; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 694; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 695; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 696; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 697; GFX9-DPP-NEXT: s_mov_b32 s12, s8 698; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 699; GFX9-DPP-NEXT: s_mov_b32 s13, s9 700; GFX9-DPP-NEXT: s_addc_u32 s9, s35, 0 701; GFX9-DPP-NEXT: s_getpc_b64 s[4:5] 702; GFX9-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 703; GFX9-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 704; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 705; GFX9-DPP-NEXT: s_mov_b32 s14, s10 706; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 707; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 708; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 709; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 710; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 711; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 712; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 713; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 714; GFX9-DPP-NEXT: s_mov_b32 s32, 0 715; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 716; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 717; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 718; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 719; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 720; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 721; GFX9-DPP-NEXT: v_cndmask_b32_e64 v4, v3, v0, s[0:1] 722; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 723; GFX9-DPP-NEXT: s_nop 1 724; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf 725; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 726; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 727; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 728; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 729; GFX9-DPP-NEXT: s_nop 1 730; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf 731; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 732; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 733; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 734; GFX9-DPP-NEXT: s_nop 1 735; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf 736; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 737; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 738; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 739; GFX9-DPP-NEXT: s_nop 1 740; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf 741; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 742; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 743; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 744; GFX9-DPP-NEXT: s_nop 1 745; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf 746; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 747; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 748; GFX9-DPP-NEXT: s_nop 1 749; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf 750; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 751; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 752; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 753; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 754; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 755; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 756; GFX9-DPP-NEXT: s_cbranch_execz .LBB1_3 757; GFX9-DPP-NEXT: ; %bb.1: 758; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 759; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 760; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 761; GFX9-DPP-NEXT: v_max_f32_e64 v6, s4, s4 762; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 763; GFX9-DPP-NEXT: global_load_dword v1, v2, s[0:1] 764; GFX9-DPP-NEXT: .LBB1_2: ; %atomicrmw.start 765; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 766; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 767; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 768; GFX9-DPP-NEXT: v_max_f32_e32 v0, v0, v6 769; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 770; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 771; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 772; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 773; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 774; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 775; GFX9-DPP-NEXT: s_cbranch_execnz .LBB1_2 776; GFX9-DPP-NEXT: .LBB1_3: 777; GFX9-DPP-NEXT: s_endpgm 778; 779; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 780; GFX1064-DPP: ; %bb.0: 781; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 782; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 783; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 784; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 785; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 786; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 787; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 788; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 789; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 790; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 791; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 792; GFX1064-DPP-NEXT: s_getpc_b64 s[4:5] 793; GFX1064-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 794; GFX1064-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 795; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 796; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 797; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 798; GFX1064-DPP-NEXT: s_mov_b32 s14, s10 799; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 800; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 801; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 802; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 803; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 804; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 805; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 806; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 807; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 808; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 809; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 810; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1] 811; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 812; GFX1064-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 813; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 814; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v3 815; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 816; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 817; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 818; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 819; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 820; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 821; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 822; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 823; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 824; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 825; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 826; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 827; GFX1064-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 828; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 829; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 830; GFX1064-DPP-NEXT: v_readlane_b32 s2, v3, 32 831; GFX1064-DPP-NEXT: v_readlane_b32 s3, v3, 0 832; GFX1064-DPP-NEXT: v_max_f32_e64 v3, s2, s2 833; GFX1064-DPP-NEXT: v_max_f32_e64 v4, s3, s3 834; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 835; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 836; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 837; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 838; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 839; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 840; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, v3 841; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 842; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 843; GFX1064-DPP-NEXT: s_cbranch_execz .LBB1_2 844; GFX1064-DPP-NEXT: ; %bb.1: 845; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 846; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 847; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 848; GFX1064-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 849; GFX1064-DPP-NEXT: .LBB1_2: 850; GFX1064-DPP-NEXT: s_endpgm 851; 852; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 853; GFX1032-DPP: ; %bb.0: 854; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 855; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 856; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 857; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 858; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 859; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 860; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 861; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 862; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 863; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 864; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 865; GFX1032-DPP-NEXT: s_getpc_b64 s[4:5] 866; GFX1032-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 867; GFX1032-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 868; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 869; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 870; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 871; GFX1032-DPP-NEXT: s_mov_b32 s14, s10 872; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 873; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 874; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 875; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 876; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 877; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 878; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 879; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 880; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 881; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 882; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 883; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0 884; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 885; GFX1032-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 886; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 887; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v3 888; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v4, v3 889; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 890; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 891; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 892; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 893; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 894; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 895; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 896; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 897; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 898; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 899; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 900; GFX1032-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 901; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 902; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 903; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 904; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 905; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 906; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 907; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 908; GFX1032-DPP-NEXT: s_cbranch_execz .LBB1_2 909; GFX1032-DPP-NEXT: ; %bb.1: 910; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 911; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 912; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 913; GFX1032-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 914; GFX1032-DPP-NEXT: .LBB1_2: 915; GFX1032-DPP-NEXT: s_endpgm 916; 917; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 918; GFX1164-DPP: ; %bb.0: 919; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 920; GFX1164-DPP-NEXT: s_mov_b32 s12, s8 921; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 922; GFX1164-DPP-NEXT: s_mov_b32 s13, s9 923; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 924; GFX1164-DPP-NEXT: s_getpc_b64 s[4:5] 925; GFX1164-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 926; GFX1164-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 927; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 928; GFX1164-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 929; GFX1164-DPP-NEXT: s_mov_b32 s14, s10 930; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 931; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 932; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 933; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 934; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 935; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 936; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 937; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 938; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1] 939; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 940; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 941; GFX1164-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 942; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 943; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v1 944; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 945; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v2, v1 946; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 947; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 948; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 949; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 950; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 951; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 952; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 953; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 954; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 955; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 956; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 957; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 958; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 959; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 960; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 961; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 962; GFX1164-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 963; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 964; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 965; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 966; GFX1164-DPP-NEXT: v_permlane64_b32 v2, v1 967; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 968; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 969; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 970; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 971; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 972; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 973; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 974; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 975; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 976; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 977; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 978; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 979; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 980; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 981; GFX1164-DPP-NEXT: s_cbranch_execz .LBB1_2 982; GFX1164-DPP-NEXT: ; %bb.1: 983; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 984; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 985; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 986; GFX1164-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 987; GFX1164-DPP-NEXT: .LBB1_2: 988; GFX1164-DPP-NEXT: s_endpgm 989; 990; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: 991; GFX1132-DPP: ; %bb.0: 992; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 993; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 994; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 995; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 996; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] 997; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 998; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 999; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 1000; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 1001; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1002; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1003; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1004; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 1005; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 1006; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 1007; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 1008; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1009; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 1010; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 1011; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 1012; GFX1132-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1013; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1014; GFX1132-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 1015; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1016; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1 1017; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v2, v1 1018; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1019; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 1020; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 1021; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1022; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1023; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 1024; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1025; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 1026; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1027; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1028; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 1029; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v3, v3 1030; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1031; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1032; GFX1132-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 1033; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1034; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v2, v2 1035; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1036; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 1037; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1038; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v4, exec_lo, 0 1039; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, v1 1040; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 1041; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 1042; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 1043; GFX1132-DPP-NEXT: s_cbranch_execz .LBB1_2 1044; GFX1132-DPP-NEXT: ; %bb.1: 1045; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 1046; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0 1047; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 1048; GFX1132-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 1049; GFX1132-DPP-NEXT: .LBB1_2: 1050; GFX1132-DPP-NEXT: s_endpgm 1051 %divValue = call float @div.float.value() 1052 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 1053 ret void 1054} 1055 1056define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 { 1057; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1058; GFX7LESS: ; %bb.0: 1059; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 1060; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 1061; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1062; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 1063; GFX7LESS-NEXT: s_cbranch_execz .LBB2_3 1064; GFX7LESS-NEXT: ; %bb.1: 1065; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1066; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 1067; GFX7LESS-NEXT: s_load_dword s2, s[0:1], 0x0 1068; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 1069; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 1070; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 1071; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 1072; GFX7LESS-NEXT: s_mov_b32 s2, -1 1073; GFX7LESS-NEXT: .LBB2_2: ; %atomicrmw.start 1074; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 1075; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 1076; GFX7LESS-NEXT: v_max_f32_e32 v0, 4.0, v0 1077; GFX7LESS-NEXT: s_waitcnt expcnt(0) 1078; GFX7LESS-NEXT: v_mov_b32_e32 v3, v1 1079; GFX7LESS-NEXT: v_mov_b32_e32 v2, v0 1080; GFX7LESS-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 1081; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 1082; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 1083; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1084; GFX7LESS-NEXT: v_mov_b32_e32 v1, v2 1085; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 1086; GFX7LESS-NEXT: s_cbranch_execnz .LBB2_2 1087; GFX7LESS-NEXT: .LBB2_3: 1088; GFX7LESS-NEXT: s_endpgm 1089; 1090; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1091; GFX9: ; %bb.0: 1092; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1093; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1094; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1095; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 1096; GFX9-NEXT: s_cbranch_execz .LBB2_3 1097; GFX9-NEXT: ; %bb.1: 1098; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1099; GFX9-NEXT: s_mov_b64 s[2:3], 0 1100; GFX9-NEXT: v_mov_b32_e32 v2, 0 1101; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1102; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 1103; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1104; GFX9-NEXT: v_mov_b32_e32 v1, s4 1105; GFX9-NEXT: .LBB2_2: ; %atomicrmw.start 1106; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1107; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 1108; GFX9-NEXT: v_max_f32_e32 v0, 4.0, v0 1109; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 1110; GFX9-NEXT: s_waitcnt vmcnt(0) 1111; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 1112; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 1113; GFX9-NEXT: v_mov_b32_e32 v1, v0 1114; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 1115; GFX9-NEXT: s_cbranch_execnz .LBB2_2 1116; GFX9-NEXT: .LBB2_3: 1117; GFX9-NEXT: s_endpgm 1118; 1119; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1120; GFX1064: ; %bb.0: 1121; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1122; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1123; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1124; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 1125; GFX1064-NEXT: s_cbranch_execz .LBB2_2 1126; GFX1064-NEXT: ; %bb.1: 1127; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1128; GFX1064-NEXT: v_mov_b32_e32 v0, 0 1129; GFX1064-NEXT: v_mov_b32_e32 v1, 4.0 1130; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 1131; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 1132; GFX1064-NEXT: .LBB2_2: 1133; GFX1064-NEXT: s_endpgm 1134; 1135; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1136; GFX1032: ; %bb.0: 1137; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1138; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1139; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 1140; GFX1032-NEXT: s_cbranch_execz .LBB2_2 1141; GFX1032-NEXT: ; %bb.1: 1142; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1143; GFX1032-NEXT: v_mov_b32_e32 v0, 0 1144; GFX1032-NEXT: v_mov_b32_e32 v1, 4.0 1145; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 1146; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 1147; GFX1032-NEXT: .LBB2_2: 1148; GFX1032-NEXT: s_endpgm 1149; 1150; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1151; GFX1164: ; %bb.0: 1152; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1153; GFX1164-NEXT: s_mov_b64 s[0:1], exec 1154; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1155; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1156; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 1157; GFX1164-NEXT: s_cbranch_execz .LBB2_2 1158; GFX1164-NEXT: ; %bb.1: 1159; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1160; GFX1164-NEXT: v_mov_b32_e32 v0, 0 1161; GFX1164-NEXT: v_mov_b32_e32 v1, 4.0 1162; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 1163; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1164; GFX1164-NEXT: .LBB2_2: 1165; GFX1164-NEXT: s_endpgm 1166; 1167; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1168; GFX1132: ; %bb.0: 1169; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1170; GFX1132-NEXT: s_mov_b32 s0, exec_lo 1171; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 1172; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 1173; GFX1132-NEXT: s_cbranch_execz .LBB2_2 1174; GFX1132-NEXT: ; %bb.1: 1175; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1176; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 1177; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 1178; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1179; GFX1132-NEXT: .LBB2_2: 1180; GFX1132-NEXT: s_endpgm 1181; 1182; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1183; GFX7LESS-DPP: ; %bb.0: 1184; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 1185; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 1186; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1187; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 1188; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB2_3 1189; GFX7LESS-DPP-NEXT: ; %bb.1: 1190; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1191; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 1192; GFX7LESS-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 1193; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 1194; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 1195; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 1196; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s2 1197; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 1198; GFX7LESS-DPP-NEXT: .LBB2_2: ; %atomicrmw.start 1199; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 1200; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 1201; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 1202; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 1203; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 1204; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v0 1205; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 1206; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 1207; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 1208; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1209; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v2 1210; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 1211; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB2_2 1212; GFX7LESS-DPP-NEXT: .LBB2_3: 1213; GFX7LESS-DPP-NEXT: s_endpgm 1214; 1215; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1216; GFX9-DPP: ; %bb.0: 1217; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1218; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1219; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1220; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 1221; GFX9-DPP-NEXT: s_cbranch_execz .LBB2_3 1222; GFX9-DPP-NEXT: ; %bb.1: 1223; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1224; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 1225; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 1226; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 1227; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 1228; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 1229; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 1230; GFX9-DPP-NEXT: .LBB2_2: ; %atomicrmw.start 1231; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 1232; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 1233; GFX9-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 1234; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 1235; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 1236; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 1237; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 1238; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 1239; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 1240; GFX9-DPP-NEXT: s_cbranch_execnz .LBB2_2 1241; GFX9-DPP-NEXT: .LBB2_3: 1242; GFX9-DPP-NEXT: s_endpgm 1243; 1244; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1245; GFX1064-DPP: ; %bb.0: 1246; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1247; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1248; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1249; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 1250; GFX1064-DPP-NEXT: s_cbranch_execz .LBB2_2 1251; GFX1064-DPP-NEXT: ; %bb.1: 1252; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1253; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 1254; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 4.0 1255; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 1256; GFX1064-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 1257; GFX1064-DPP-NEXT: .LBB2_2: 1258; GFX1064-DPP-NEXT: s_endpgm 1259; 1260; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1261; GFX1032-DPP: ; %bb.0: 1262; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1263; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1264; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 1265; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_2 1266; GFX1032-DPP-NEXT: ; %bb.1: 1267; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1268; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 1269; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 4.0 1270; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 1271; GFX1032-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 1272; GFX1032-DPP-NEXT: .LBB2_2: 1273; GFX1032-DPP-NEXT: s_endpgm 1274; 1275; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1276; GFX1164-DPP: ; %bb.0: 1277; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1278; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 1279; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1280; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1281; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 1282; GFX1164-DPP-NEXT: s_cbranch_execz .LBB2_2 1283; GFX1164-DPP-NEXT: ; %bb.1: 1284; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1285; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 1286; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 4.0 1287; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 1288; GFX1164-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1289; GFX1164-DPP-NEXT: .LBB2_2: 1290; GFX1164-DPP-NEXT: s_endpgm 1291; 1292; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe: 1293; GFX1132-DPP: ; %bb.0: 1294; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1295; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 1296; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 1297; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 1298; GFX1132-DPP-NEXT: s_cbranch_execz .LBB2_2 1299; GFX1132-DPP-NEXT: ; %bb.1: 1300; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1301; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 1302; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 1303; GFX1132-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1304; GFX1132-DPP-NEXT: .LBB2_2: 1305; GFX1132-DPP-NEXT: s_endpgm 1306 %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1 1307 ret void 1308} 1309 1310 1311define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 { 1312; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1313; GFX7LESS: ; %bb.0: 1314; GFX7LESS-NEXT: s_mov_b32 s32, 0 1315; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1316; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1317; GFX7LESS-NEXT: s_mov_b32 s38, -1 1318; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 1319; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 1320; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 1321; GFX7LESS-NEXT: s_mov_b32 s14, s10 1322; GFX7LESS-NEXT: s_mov_b32 s13, s9 1323; GFX7LESS-NEXT: s_mov_b32 s12, s8 1324; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[6:7] 1325; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5] 1326; GFX7LESS-NEXT: s_add_u32 s8, s34, 44 1327; GFX7LESS-NEXT: s_addc_u32 s9, s35, 0 1328; GFX7LESS-NEXT: s_getpc_b64 s[4:5] 1329; GFX7LESS-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1330; GFX7LESS-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1331; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1332; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1333; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1334; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 1335; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 1336; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] 1337; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 1338; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] 1339; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] 1340; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 1341; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 1342; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 1343; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0x7fc00000 1344; GFX7LESS-NEXT: .LBB3_1: ; %ComputeLoop 1345; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 1346; GFX7LESS-NEXT: s_ff1_i32_b64 s2, s[0:1] 1347; GFX7LESS-NEXT: v_mul_f32_e32 v1, 1.0, v2 1348; GFX7LESS-NEXT: v_readlane_b32 s4, v0, s2 1349; GFX7LESS-NEXT: s_lshl_b64 s[2:3], 1, s2 1350; GFX7LESS-NEXT: v_mul_f32_e64 v2, 1.0, s4 1351; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 1352; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 1353; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 1354; GFX7LESS-NEXT: v_max_f32_e32 v2, v1, v2 1355; GFX7LESS-NEXT: s_cbranch_vccnz .LBB3_1 1356; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 1357; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 1358; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 1359; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1360; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 1361; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1362; GFX7LESS-NEXT: s_cbranch_execz .LBB3_5 1363; GFX7LESS-NEXT: ; %bb.3: 1364; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x9 1365; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 1366; GFX7LESS-NEXT: s_mov_b32 s2, -1 1367; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 1368; GFX7LESS-NEXT: buffer_load_dword v1, off, s[0:3], 0 1369; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 1370; GFX7LESS-NEXT: v_mul_f32_e32 v2, 1.0, v2 1371; GFX7LESS-NEXT: .LBB3_4: ; %atomicrmw.start 1372; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 1373; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 1374; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 1375; GFX7LESS-NEXT: v_max_f32_e32 v0, v0, v2 1376; GFX7LESS-NEXT: s_waitcnt expcnt(0) 1377; GFX7LESS-NEXT: v_mov_b32_e32 v4, v1 1378; GFX7LESS-NEXT: v_mov_b32_e32 v3, v0 1379; GFX7LESS-NEXT: buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc 1380; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 1381; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 1382; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1383; GFX7LESS-NEXT: v_mov_b32_e32 v1, v3 1384; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 1385; GFX7LESS-NEXT: s_cbranch_execnz .LBB3_4 1386; GFX7LESS-NEXT: .LBB3_5: 1387; GFX7LESS-NEXT: s_endpgm 1388; 1389; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1390; GFX9: ; %bb.0: 1391; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1392; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1393; GFX9-NEXT: s_mov_b32 s38, -1 1394; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1395; GFX9-NEXT: s_add_u32 s36, s36, s11 1396; GFX9-NEXT: s_addc_u32 s37, s37, 0 1397; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] 1398; GFX9-NEXT: s_mov_b32 s12, s8 1399; GFX9-NEXT: s_add_u32 s8, s34, 44 1400; GFX9-NEXT: s_mov_b32 s13, s9 1401; GFX9-NEXT: s_addc_u32 s9, s35, 0 1402; GFX9-NEXT: s_getpc_b64 s[4:5] 1403; GFX9-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1404; GFX9-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1405; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1406; GFX9-NEXT: s_mov_b32 s14, s10 1407; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] 1408; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1409; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1410; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] 1411; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 1412; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1413; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 1414; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1415; GFX9-NEXT: s_mov_b32 s32, 0 1416; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1417; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 1418; GFX9-NEXT: s_mov_b64 s[0:1], exec 1419; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 1420; GFX9-NEXT: .LBB3_1: ; %ComputeLoop 1421; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1422; GFX9-NEXT: s_ff1_i32_b64 s2, s[0:1] 1423; GFX9-NEXT: v_readlane_b32 s4, v0, s2 1424; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s2 1425; GFX9-NEXT: v_max_f32_e32 v1, v2, v2 1426; GFX9-NEXT: v_max_f32_e64 v2, s4, s4 1427; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 1428; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 1429; GFX9-NEXT: v_max_f32_e32 v2, v1, v2 1430; GFX9-NEXT: s_cbranch_scc1 .LBB3_1 1431; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 1432; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1433; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1434; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1435; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 1436; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1437; GFX9-NEXT: s_cbranch_execz .LBB3_5 1438; GFX9-NEXT: ; %bb.3: 1439; GFX9-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1440; GFX9-NEXT: v_mov_b32_e32 v3, 0 1441; GFX9-NEXT: s_mov_b64 s[2:3], 0 1442; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 1443; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1444; GFX9-NEXT: global_load_dword v1, v3, s[0:1] 1445; GFX9-NEXT: .LBB3_4: ; %atomicrmw.start 1446; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1447; GFX9-NEXT: s_waitcnt vmcnt(0) 1448; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 1449; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 1450; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc 1451; GFX9-NEXT: s_waitcnt vmcnt(0) 1452; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 1453; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 1454; GFX9-NEXT: v_mov_b32_e32 v1, v0 1455; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 1456; GFX9-NEXT: s_cbranch_execnz .LBB3_4 1457; GFX9-NEXT: .LBB3_5: 1458; GFX9-NEXT: s_endpgm 1459; 1460; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1461; GFX1064: ; %bb.0: 1462; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1463; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1464; GFX1064-NEXT: s_mov_b32 s38, -1 1465; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 1466; GFX1064-NEXT: s_add_u32 s36, s36, s11 1467; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 1468; GFX1064-NEXT: s_addc_u32 s37, s37, 0 1469; GFX1064-NEXT: s_mov_b32 s12, s8 1470; GFX1064-NEXT: s_add_u32 s8, s34, 44 1471; GFX1064-NEXT: s_mov_b32 s13, s9 1472; GFX1064-NEXT: s_addc_u32 s9, s35, 0 1473; GFX1064-NEXT: s_getpc_b64 s[4:5] 1474; GFX1064-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1475; GFX1064-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1476; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1477; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1478; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1479; GFX1064-NEXT: s_mov_b32 s14, s10 1480; GFX1064-NEXT: s_mov_b64 s[10:11], s[6:7] 1481; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] 1482; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 1483; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 1484; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] 1485; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] 1486; GFX1064-NEXT: s_mov_b32 s32, 0 1487; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 1488; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 1489; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1490; GFX1064-NEXT: s_mov_b64 s[0:1], exec 1491; GFX1064-NEXT: .LBB3_1: ; %ComputeLoop 1492; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 1493; GFX1064-NEXT: s_ff1_i32_b64 s2, s[0:1] 1494; GFX1064-NEXT: v_max_f32_e32 v1, v1, v1 1495; GFX1064-NEXT: v_readlane_b32 s3, v0, s2 1496; GFX1064-NEXT: v_max_f32_e64 v2, s3, s3 1497; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s2 1498; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 1499; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 1500; GFX1064-NEXT: v_max_f32_e32 v1, v1, v2 1501; GFX1064-NEXT: s_cbranch_scc1 .LBB3_1 1502; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 1503; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1504; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1505; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1506; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 1507; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1508; GFX1064-NEXT: s_cbranch_execz .LBB3_4 1509; GFX1064-NEXT: ; %bb.3: 1510; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1511; GFX1064-NEXT: v_mov_b32_e32 v0, 0 1512; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 1513; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 1514; GFX1064-NEXT: .LBB3_4: 1515; GFX1064-NEXT: s_endpgm 1516; 1517; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1518; GFX1032: ; %bb.0: 1519; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1520; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1521; GFX1032-NEXT: s_mov_b32 s38, -1 1522; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 1523; GFX1032-NEXT: s_add_u32 s36, s36, s11 1524; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 1525; GFX1032-NEXT: s_addc_u32 s37, s37, 0 1526; GFX1032-NEXT: s_mov_b32 s12, s8 1527; GFX1032-NEXT: s_add_u32 s8, s34, 44 1528; GFX1032-NEXT: s_mov_b32 s13, s9 1529; GFX1032-NEXT: s_addc_u32 s9, s35, 0 1530; GFX1032-NEXT: s_getpc_b64 s[4:5] 1531; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1532; GFX1032-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1533; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1534; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1535; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1536; GFX1032-NEXT: s_mov_b32 s14, s10 1537; GFX1032-NEXT: s_mov_b64 s[10:11], s[6:7] 1538; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] 1539; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 1540; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 1541; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] 1542; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] 1543; GFX1032-NEXT: s_mov_b32 s32, 0 1544; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 1545; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 1546; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1547; GFX1032-NEXT: s_mov_b32 s0, exec_lo 1548; GFX1032-NEXT: .LBB3_1: ; %ComputeLoop 1549; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 1550; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 1551; GFX1032-NEXT: v_max_f32_e32 v1, v1, v1 1552; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 1553; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 1554; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 1555; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 1556; GFX1032-NEXT: v_max_f32_e64 v2, s2, s2 1557; GFX1032-NEXT: v_max_f32_e32 v1, v1, v2 1558; GFX1032-NEXT: s_cbranch_scc1 .LBB3_1 1559; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 1560; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1561; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1562; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 1563; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 1564; GFX1032-NEXT: s_cbranch_execz .LBB3_4 1565; GFX1032-NEXT: ; %bb.3: 1566; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1567; GFX1032-NEXT: v_mov_b32_e32 v0, 0 1568; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 1569; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 1570; GFX1032-NEXT: .LBB3_4: 1571; GFX1032-NEXT: s_endpgm 1572; 1573; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1574; GFX1164: ; %bb.0: 1575; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 1576; GFX1164-NEXT: s_mov_b32 s12, s8 1577; GFX1164-NEXT: s_add_u32 s8, s34, 44 1578; GFX1164-NEXT: s_mov_b32 s13, s9 1579; GFX1164-NEXT: s_addc_u32 s9, s35, 0 1580; GFX1164-NEXT: s_getpc_b64 s[4:5] 1581; GFX1164-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1582; GFX1164-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1583; GFX1164-NEXT: v_mov_b32_e32 v31, v0 1584; GFX1164-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 1585; GFX1164-NEXT: s_mov_b32 s14, s10 1586; GFX1164-NEXT: s_mov_b64 s[10:11], s[6:7] 1587; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] 1588; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 1589; GFX1164-NEXT: s_mov_b32 s32, 0 1590; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 1591; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] 1592; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1593; GFX1164-NEXT: s_mov_b64 s[0:1], exec 1594; GFX1164-NEXT: .LBB3_1: ; %ComputeLoop 1595; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 1596; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1597; GFX1164-NEXT: s_ctz_i32_b64 s2, s[0:1] 1598; GFX1164-NEXT: v_max_f32_e32 v1, v1, v1 1599; GFX1164-NEXT: v_readlane_b32 s3, v0, s2 1600; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 1601; GFX1164-NEXT: v_max_f32_e64 v2, s3, s3 1602; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s2 1603; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 1604; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1605; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 1606; GFX1164-NEXT: v_max_f32_e32 v1, v1, v2 1607; GFX1164-NEXT: s_cbranch_scc1 .LBB3_1 1608; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 1609; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1610; GFX1164-NEXT: s_mov_b64 s[0:1], exec 1611; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1612; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 1613; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 1614; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1615; GFX1164-NEXT: s_cbranch_execz .LBB3_4 1616; GFX1164-NEXT: ; %bb.3: 1617; GFX1164-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 1618; GFX1164-NEXT: v_mov_b32_e32 v0, 0 1619; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 1620; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1621; GFX1164-NEXT: .LBB3_4: 1622; GFX1164-NEXT: s_endpgm 1623; 1624; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1625; GFX1132: ; %bb.0: 1626; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 1627; GFX1132-NEXT: v_mov_b32_e32 v31, v0 1628; GFX1132-NEXT: s_add_u32 s8, s34, 44 1629; GFX1132-NEXT: s_addc_u32 s9, s35, 0 1630; GFX1132-NEXT: s_getpc_b64 s[4:5] 1631; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1632; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1633; GFX1132-NEXT: s_mov_b32 s12, s13 1634; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 1635; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] 1636; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] 1637; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 1638; GFX1132-NEXT: s_mov_b32 s13, s14 1639; GFX1132-NEXT: s_mov_b32 s14, s15 1640; GFX1132-NEXT: s_mov_b32 s32, 0 1641; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 1642; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] 1643; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1644; GFX1132-NEXT: s_mov_b32 s0, exec_lo 1645; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop 1646; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 1647; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1648; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 1649; GFX1132-NEXT: v_max_f32_e32 v1, v1, v1 1650; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 1651; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 1652; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1653; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 1654; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 1655; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1656; GFX1132-NEXT: v_max_f32_e64 v2, s2, s2 1657; GFX1132-NEXT: v_max_f32_e32 v1, v1, v2 1658; GFX1132-NEXT: s_cbranch_scc1 .LBB3_1 1659; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 1660; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1661; GFX1132-NEXT: s_mov_b32 s0, exec_lo 1662; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 1663; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 1664; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 1665; GFX1132-NEXT: s_cbranch_execz .LBB3_4 1666; GFX1132-NEXT: ; %bb.3: 1667; GFX1132-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 1668; GFX1132-NEXT: v_mov_b32_e32 v0, 0 1669; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 1670; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 1671; GFX1132-NEXT: .LBB3_4: 1672; GFX1132-NEXT: s_endpgm 1673; 1674; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1675; GFX7LESS-DPP: ; %bb.0: 1676; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 1677; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 1678; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 1679; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 1680; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 1681; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 1682; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 1683; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 1684; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 1685; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 1686; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1687; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 1688; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 1689; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 1690; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 1691; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 1692; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] 1693; GFX7LESS-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1694; GFX7LESS-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1695; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1696; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1697; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1698; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 1699; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 1700; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1701; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1702; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] 1703; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] 1704; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 1705; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1706; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 1707; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 1708; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 1709; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start 1710; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 1711; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 1712; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 1713; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, v0, v2 1714; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 1715; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 1716; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 1717; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc 1718; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 1719; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 1720; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1721; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v3 1722; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[0:1] 1723; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB3_1 1724; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 1725; GFX7LESS-DPP-NEXT: s_endpgm 1726; 1727; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1728; GFX9-DPP: ; %bb.0: 1729; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1730; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1731; GFX9-DPP-NEXT: s_mov_b32 s38, -1 1732; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 1733; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 1734; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 1735; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 1736; GFX9-DPP-NEXT: s_mov_b32 s12, s8 1737; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 1738; GFX9-DPP-NEXT: s_mov_b32 s13, s9 1739; GFX9-DPP-NEXT: s_addc_u32 s9, s35, 0 1740; GFX9-DPP-NEXT: s_getpc_b64 s[4:5] 1741; GFX9-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1742; GFX9-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1743; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1744; GFX9-DPP-NEXT: s_mov_b32 s14, s10 1745; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1746; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1747; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1748; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1749; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1750; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 1751; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 1752; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 1753; GFX9-DPP-NEXT: s_mov_b32 s32, 0 1754; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 1755; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1756; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 1757; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 1758; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 1759; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1760; GFX9-DPP-NEXT: v_cndmask_b32_e64 v4, v3, v0, s[0:1] 1761; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1762; GFX9-DPP-NEXT: s_nop 1 1763; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf 1764; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 1765; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 1766; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 1767; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1768; GFX9-DPP-NEXT: s_nop 1 1769; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf 1770; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 1771; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 1772; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1773; GFX9-DPP-NEXT: s_nop 1 1774; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf 1775; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 1776; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 1777; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1778; GFX9-DPP-NEXT: s_nop 1 1779; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf 1780; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 1781; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 1782; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1783; GFX9-DPP-NEXT: s_nop 1 1784; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf 1785; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 1786; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 1787; GFX9-DPP-NEXT: s_nop 1 1788; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf 1789; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 1790; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 1791; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 1792; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 1793; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1794; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 1795; GFX9-DPP-NEXT: s_cbranch_execz .LBB3_3 1796; GFX9-DPP-NEXT: ; %bb.1: 1797; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1798; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 1799; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 1800; GFX9-DPP-NEXT: v_max_f32_e64 v6, s4, s4 1801; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 1802; GFX9-DPP-NEXT: global_load_dword v1, v2, s[0:1] 1803; GFX9-DPP-NEXT: .LBB3_2: ; %atomicrmw.start 1804; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 1805; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 1806; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 1807; GFX9-DPP-NEXT: v_max_f32_e32 v0, v0, v6 1808; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 1809; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 1810; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 1811; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 1812; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 1813; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 1814; GFX9-DPP-NEXT: s_cbranch_execnz .LBB3_2 1815; GFX9-DPP-NEXT: .LBB3_3: 1816; GFX9-DPP-NEXT: s_endpgm 1817; 1818; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1819; GFX1064-DPP: ; %bb.0: 1820; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1821; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1822; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 1823; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 1824; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 1825; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 1826; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 1827; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 1828; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 1829; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 1830; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 1831; GFX1064-DPP-NEXT: s_getpc_b64 s[4:5] 1832; GFX1064-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1833; GFX1064-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1834; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1835; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1836; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1837; GFX1064-DPP-NEXT: s_mov_b32 s14, s10 1838; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1839; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1840; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1841; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 1842; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 1843; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 1844; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 1845; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 1846; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1847; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 1848; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1849; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1] 1850; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1851; GFX1064-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 1852; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 1853; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v3 1854; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 1855; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 1856; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1857; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1858; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1859; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 1860; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1861; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1862; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1863; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 1864; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1865; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1866; GFX1064-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 1867; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 1868; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1869; GFX1064-DPP-NEXT: v_readlane_b32 s2, v3, 32 1870; GFX1064-DPP-NEXT: v_readlane_b32 s3, v3, 0 1871; GFX1064-DPP-NEXT: v_max_f32_e64 v3, s2, s2 1872; GFX1064-DPP-NEXT: v_max_f32_e64 v4, s3, s3 1873; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 1874; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 1875; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 1876; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 1877; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 1878; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 1879; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, v3 1880; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1881; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 1882; GFX1064-DPP-NEXT: s_cbranch_execz .LBB3_2 1883; GFX1064-DPP-NEXT: ; %bb.1: 1884; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1885; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 1886; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 1887; GFX1064-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 1888; GFX1064-DPP-NEXT: .LBB3_2: 1889; GFX1064-DPP-NEXT: s_endpgm 1890; 1891; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1892; GFX1032-DPP: ; %bb.0: 1893; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1894; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1895; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 1896; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 1897; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 1898; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 1899; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 1900; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 1901; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 1902; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 1903; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 1904; GFX1032-DPP-NEXT: s_getpc_b64 s[4:5] 1905; GFX1032-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1906; GFX1032-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1907; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 1908; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 1909; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 1910; GFX1032-DPP-NEXT: s_mov_b32 s14, s10 1911; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1912; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1913; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1914; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 1915; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 1916; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 1917; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 1918; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 1919; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1920; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 1921; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1922; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0 1923; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1924; GFX1032-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 1925; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 1926; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v3 1927; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v4, v3 1928; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 1929; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1930; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1931; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1932; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 1933; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1934; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 1935; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1936; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 1937; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 1938; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1939; GFX1032-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 1940; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 1941; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 1942; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 1943; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 1944; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 1945; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 1946; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 1947; GFX1032-DPP-NEXT: s_cbranch_execz .LBB3_2 1948; GFX1032-DPP-NEXT: ; %bb.1: 1949; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 1950; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 1951; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 1952; GFX1032-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 1953; GFX1032-DPP-NEXT: .LBB3_2: 1954; GFX1032-DPP-NEXT: s_endpgm 1955; 1956; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 1957; GFX1164-DPP: ; %bb.0: 1958; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 1959; GFX1164-DPP-NEXT: s_mov_b32 s12, s8 1960; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 1961; GFX1164-DPP-NEXT: s_mov_b32 s13, s9 1962; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 1963; GFX1164-DPP-NEXT: s_getpc_b64 s[4:5] 1964; GFX1164-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 1965; GFX1164-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 1966; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 1967; GFX1164-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 1968; GFX1164-DPP-NEXT: s_mov_b32 s14, s10 1969; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 1970; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 1971; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 1972; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 1973; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 1974; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 1975; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 1976; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 1977; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1] 1978; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1979; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1980; GFX1164-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 1981; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 1982; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v1 1983; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1984; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v2, v1 1985; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 1986; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1987; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 1988; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1989; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1990; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1991; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 1992; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 1993; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 1994; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1995; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 1996; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 1997; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1998; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 1999; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2000; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2001; GFX1164-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 2002; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 2003; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2004; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2005; GFX1164-DPP-NEXT: v_permlane64_b32 v2, v1 2006; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 2007; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 2008; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 2009; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2010; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 2011; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 2012; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2013; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 2014; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 2015; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 2016; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2017; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 2018; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 2019; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 2020; GFX1164-DPP-NEXT: s_cbranch_execz .LBB3_2 2021; GFX1164-DPP-NEXT: ; %bb.1: 2022; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 2023; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 2024; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 2025; GFX1164-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 2026; GFX1164-DPP-NEXT: .LBB3_2: 2027; GFX1164-DPP-NEXT: s_endpgm 2028; 2029; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: 2030; GFX1132-DPP: ; %bb.0: 2031; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 2032; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 2033; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 2034; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 2035; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] 2036; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2037; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2038; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 2039; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 2040; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 2041; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 2042; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 2043; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 2044; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 2045; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 2046; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 2047; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 2048; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 2049; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2050; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 2051; GFX1132-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 2052; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 2053; GFX1132-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 2054; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2055; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1 2056; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v2, v1 2057; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2058; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 2059; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 2060; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2061; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2062; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 2063; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2064; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 2065; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2066; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2067; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 2068; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v3, v3 2069; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2070; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2071; GFX1132-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 2072; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2073; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v2, v2 2074; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 2075; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 2076; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2077; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v4, exec_lo, 0 2078; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, v1 2079; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 2080; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 2081; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 2082; GFX1132-DPP-NEXT: s_cbranch_execz .LBB3_2 2083; GFX1132-DPP-NEXT: ; %bb.1: 2084; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 2085; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0 2086; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 2087; GFX1132-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 2088; GFX1132-DPP-NEXT: .LBB3_2: 2089; GFX1132-DPP-NEXT: s_endpgm 2090 %divValue = call float @div.float.value() 2091 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1 2092 ret void 2093} 2094 2095 2096define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 { 2097; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2098; GFX7LESS: ; %bb.0: 2099; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 2100; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 2101; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2102; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 2103; GFX7LESS-NEXT: s_cbranch_execz .LBB4_3 2104; GFX7LESS-NEXT: ; %bb.1: 2105; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2106; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 2107; GFX7LESS-NEXT: s_load_dword s2, s[0:1], 0x0 2108; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 2109; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 2110; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 2111; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 2112; GFX7LESS-NEXT: s_mov_b32 s2, -1 2113; GFX7LESS-NEXT: .LBB4_2: ; %atomicrmw.start 2114; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 2115; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 2116; GFX7LESS-NEXT: v_max_f32_e32 v0, 4.0, v0 2117; GFX7LESS-NEXT: s_waitcnt expcnt(0) 2118; GFX7LESS-NEXT: v_mov_b32_e32 v3, v1 2119; GFX7LESS-NEXT: v_mov_b32_e32 v2, v0 2120; GFX7LESS-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 2121; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 2122; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 2123; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2124; GFX7LESS-NEXT: v_mov_b32_e32 v1, v2 2125; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 2126; GFX7LESS-NEXT: s_cbranch_execnz .LBB4_2 2127; GFX7LESS-NEXT: .LBB4_3: 2128; GFX7LESS-NEXT: s_endpgm 2129; 2130; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2131; GFX9: ; %bb.0: 2132; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2133; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2134; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2135; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 2136; GFX9-NEXT: s_cbranch_execz .LBB4_3 2137; GFX9-NEXT: ; %bb.1: 2138; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2139; GFX9-NEXT: s_mov_b64 s[2:3], 0 2140; GFX9-NEXT: v_mov_b32_e32 v2, 0 2141; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2142; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 2143; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2144; GFX9-NEXT: v_mov_b32_e32 v1, s4 2145; GFX9-NEXT: .LBB4_2: ; %atomicrmw.start 2146; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2147; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 2148; GFX9-NEXT: v_max_f32_e32 v0, 4.0, v0 2149; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 2150; GFX9-NEXT: s_waitcnt vmcnt(0) 2151; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 2152; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 2153; GFX9-NEXT: v_mov_b32_e32 v1, v0 2154; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 2155; GFX9-NEXT: s_cbranch_execnz .LBB4_2 2156; GFX9-NEXT: .LBB4_3: 2157; GFX9-NEXT: s_endpgm 2158; 2159; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2160; GFX1064: ; %bb.0: 2161; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2162; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2163; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2164; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 2165; GFX1064-NEXT: s_cbranch_execz .LBB4_2 2166; GFX1064-NEXT: ; %bb.1: 2167; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2168; GFX1064-NEXT: v_mov_b32_e32 v0, 0 2169; GFX1064-NEXT: v_mov_b32_e32 v1, 4.0 2170; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 2171; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 2172; GFX1064-NEXT: .LBB4_2: 2173; GFX1064-NEXT: s_endpgm 2174; 2175; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2176; GFX1032: ; %bb.0: 2177; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2178; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 2179; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 2180; GFX1032-NEXT: s_cbranch_execz .LBB4_2 2181; GFX1032-NEXT: ; %bb.1: 2182; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2183; GFX1032-NEXT: v_mov_b32_e32 v0, 0 2184; GFX1032-NEXT: v_mov_b32_e32 v1, 4.0 2185; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 2186; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 2187; GFX1032-NEXT: .LBB4_2: 2188; GFX1032-NEXT: s_endpgm 2189; 2190; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2191; GFX1164: ; %bb.0: 2192; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2193; GFX1164-NEXT: s_mov_b64 s[0:1], exec 2194; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2195; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2196; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 2197; GFX1164-NEXT: s_cbranch_execz .LBB4_2 2198; GFX1164-NEXT: ; %bb.1: 2199; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2200; GFX1164-NEXT: v_mov_b32_e32 v0, 0 2201; GFX1164-NEXT: v_mov_b32_e32 v1, 4.0 2202; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 2203; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2204; GFX1164-NEXT: .LBB4_2: 2205; GFX1164-NEXT: s_endpgm 2206; 2207; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2208; GFX1132: ; %bb.0: 2209; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2210; GFX1132-NEXT: s_mov_b32 s0, exec_lo 2211; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 2212; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 2213; GFX1132-NEXT: s_cbranch_execz .LBB4_2 2214; GFX1132-NEXT: ; %bb.1: 2215; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2216; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 2217; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 2218; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2219; GFX1132-NEXT: .LBB4_2: 2220; GFX1132-NEXT: s_endpgm 2221; 2222; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2223; GFX7LESS-DPP: ; %bb.0: 2224; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 2225; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 2226; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2227; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 2228; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB4_3 2229; GFX7LESS-DPP-NEXT: ; %bb.1: 2230; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2231; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 2232; GFX7LESS-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 2233; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 2234; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 2235; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 2236; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s2 2237; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 2238; GFX7LESS-DPP-NEXT: .LBB4_2: ; %atomicrmw.start 2239; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 2240; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 2241; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 2242; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 2243; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 2244; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v0 2245; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 2246; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 2247; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 2248; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2249; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v2 2250; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 2251; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB4_2 2252; GFX7LESS-DPP-NEXT: .LBB4_3: 2253; GFX7LESS-DPP-NEXT: s_endpgm 2254; 2255; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2256; GFX9-DPP: ; %bb.0: 2257; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2258; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2259; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2260; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 2261; GFX9-DPP-NEXT: s_cbranch_execz .LBB4_3 2262; GFX9-DPP-NEXT: ; %bb.1: 2263; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2264; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 2265; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 2266; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 2267; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 2268; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 2269; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 2270; GFX9-DPP-NEXT: .LBB4_2: ; %atomicrmw.start 2271; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 2272; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 2273; GFX9-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 2274; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 2275; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 2276; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 2277; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 2278; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 2279; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 2280; GFX9-DPP-NEXT: s_cbranch_execnz .LBB4_2 2281; GFX9-DPP-NEXT: .LBB4_3: 2282; GFX9-DPP-NEXT: s_endpgm 2283; 2284; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2285; GFX1064-DPP: ; %bb.0: 2286; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2287; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2288; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2289; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 2290; GFX1064-DPP-NEXT: s_cbranch_execz .LBB4_2 2291; GFX1064-DPP-NEXT: ; %bb.1: 2292; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2293; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 2294; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 4.0 2295; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 2296; GFX1064-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 2297; GFX1064-DPP-NEXT: .LBB4_2: 2298; GFX1064-DPP-NEXT: s_endpgm 2299; 2300; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2301; GFX1032-DPP: ; %bb.0: 2302; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2303; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 2304; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 2305; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_2 2306; GFX1032-DPP-NEXT: ; %bb.1: 2307; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2308; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 2309; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 4.0 2310; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 2311; GFX1032-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 2312; GFX1032-DPP-NEXT: .LBB4_2: 2313; GFX1032-DPP-NEXT: s_endpgm 2314; 2315; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2316; GFX1164-DPP: ; %bb.0: 2317; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2318; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 2319; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2320; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2321; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 2322; GFX1164-DPP-NEXT: s_cbranch_execz .LBB4_2 2323; GFX1164-DPP-NEXT: ; %bb.1: 2324; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2325; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 2326; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 4.0 2327; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 2328; GFX1164-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2329; GFX1164-DPP-NEXT: .LBB4_2: 2330; GFX1164-DPP-NEXT: s_endpgm 2331; 2332; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_default_scope_unsafe: 2333; GFX1132-DPP: ; %bb.0: 2334; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2335; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 2336; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 2337; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 2338; GFX1132-DPP-NEXT: s_cbranch_execz .LBB4_2 2339; GFX1132-DPP-NEXT: ; %bb.1: 2340; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2341; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 2342; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 2343; GFX1132-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2344; GFX1132-DPP-NEXT: .LBB4_2: 2345; GFX1132-DPP-NEXT: s_endpgm 2346 %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1 2347 ret void 2348} 2349 2350define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 { 2351; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2352; GFX7LESS: ; %bb.0: 2353; GFX7LESS-NEXT: s_mov_b32 s32, 0 2354; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2355; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2356; GFX7LESS-NEXT: s_mov_b32 s38, -1 2357; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 2358; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 2359; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 2360; GFX7LESS-NEXT: s_mov_b32 s14, s10 2361; GFX7LESS-NEXT: s_mov_b32 s13, s9 2362; GFX7LESS-NEXT: s_mov_b32 s12, s8 2363; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[6:7] 2364; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5] 2365; GFX7LESS-NEXT: s_add_u32 s8, s34, 44 2366; GFX7LESS-NEXT: s_addc_u32 s9, s35, 0 2367; GFX7LESS-NEXT: s_getpc_b64 s[4:5] 2368; GFX7LESS-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2369; GFX7LESS-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2370; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2371; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2372; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2373; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 2374; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 2375; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] 2376; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 2377; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] 2378; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] 2379; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 2380; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 2381; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 2382; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0x7fc00000 2383; GFX7LESS-NEXT: .LBB5_1: ; %ComputeLoop 2384; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 2385; GFX7LESS-NEXT: s_ff1_i32_b64 s2, s[0:1] 2386; GFX7LESS-NEXT: v_mul_f32_e32 v1, 1.0, v2 2387; GFX7LESS-NEXT: v_readlane_b32 s4, v0, s2 2388; GFX7LESS-NEXT: s_lshl_b64 s[2:3], 1, s2 2389; GFX7LESS-NEXT: v_mul_f32_e64 v2, 1.0, s4 2390; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 2391; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 2392; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 2393; GFX7LESS-NEXT: v_max_f32_e32 v2, v1, v2 2394; GFX7LESS-NEXT: s_cbranch_vccnz .LBB5_1 2395; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 2396; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 2397; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 2398; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2399; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 2400; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 2401; GFX7LESS-NEXT: s_cbranch_execz .LBB5_5 2402; GFX7LESS-NEXT: ; %bb.3: 2403; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x9 2404; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 2405; GFX7LESS-NEXT: s_mov_b32 s2, -1 2406; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 2407; GFX7LESS-NEXT: buffer_load_dword v1, off, s[0:3], 0 2408; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 2409; GFX7LESS-NEXT: v_mul_f32_e32 v2, 1.0, v2 2410; GFX7LESS-NEXT: .LBB5_4: ; %atomicrmw.start 2411; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 2412; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 2413; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 2414; GFX7LESS-NEXT: v_max_f32_e32 v0, v0, v2 2415; GFX7LESS-NEXT: s_waitcnt expcnt(0) 2416; GFX7LESS-NEXT: v_mov_b32_e32 v4, v1 2417; GFX7LESS-NEXT: v_mov_b32_e32 v3, v0 2418; GFX7LESS-NEXT: buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc 2419; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 2420; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 2421; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2422; GFX7LESS-NEXT: v_mov_b32_e32 v1, v3 2423; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 2424; GFX7LESS-NEXT: s_cbranch_execnz .LBB5_4 2425; GFX7LESS-NEXT: .LBB5_5: 2426; GFX7LESS-NEXT: s_endpgm 2427; 2428; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2429; GFX9: ; %bb.0: 2430; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2431; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2432; GFX9-NEXT: s_mov_b32 s38, -1 2433; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2434; GFX9-NEXT: s_add_u32 s36, s36, s11 2435; GFX9-NEXT: s_addc_u32 s37, s37, 0 2436; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] 2437; GFX9-NEXT: s_mov_b32 s12, s8 2438; GFX9-NEXT: s_add_u32 s8, s34, 44 2439; GFX9-NEXT: s_mov_b32 s13, s9 2440; GFX9-NEXT: s_addc_u32 s9, s35, 0 2441; GFX9-NEXT: s_getpc_b64 s[4:5] 2442; GFX9-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2443; GFX9-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2444; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2445; GFX9-NEXT: s_mov_b32 s14, s10 2446; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] 2447; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2448; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2449; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] 2450; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 2451; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2452; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 2453; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2454; GFX9-NEXT: s_mov_b32 s32, 0 2455; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2456; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 2457; GFX9-NEXT: s_mov_b64 s[0:1], exec 2458; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 2459; GFX9-NEXT: .LBB5_1: ; %ComputeLoop 2460; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2461; GFX9-NEXT: s_ff1_i32_b64 s2, s[0:1] 2462; GFX9-NEXT: v_readlane_b32 s4, v0, s2 2463; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s2 2464; GFX9-NEXT: v_max_f32_e32 v1, v2, v2 2465; GFX9-NEXT: v_max_f32_e64 v2, s4, s4 2466; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 2467; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 2468; GFX9-NEXT: v_max_f32_e32 v2, v1, v2 2469; GFX9-NEXT: s_cbranch_scc1 .LBB5_1 2470; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 2471; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2472; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2473; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2474; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 2475; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 2476; GFX9-NEXT: s_cbranch_execz .LBB5_5 2477; GFX9-NEXT: ; %bb.3: 2478; GFX9-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2479; GFX9-NEXT: v_mov_b32_e32 v3, 0 2480; GFX9-NEXT: s_mov_b64 s[2:3], 0 2481; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2482; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2483; GFX9-NEXT: global_load_dword v1, v3, s[0:1] 2484; GFX9-NEXT: .LBB5_4: ; %atomicrmw.start 2485; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2486; GFX9-NEXT: s_waitcnt vmcnt(0) 2487; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 2488; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 2489; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc 2490; GFX9-NEXT: s_waitcnt vmcnt(0) 2491; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 2492; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 2493; GFX9-NEXT: v_mov_b32_e32 v1, v0 2494; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 2495; GFX9-NEXT: s_cbranch_execnz .LBB5_4 2496; GFX9-NEXT: .LBB5_5: 2497; GFX9-NEXT: s_endpgm 2498; 2499; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2500; GFX1064: ; %bb.0: 2501; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2502; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2503; GFX1064-NEXT: s_mov_b32 s38, -1 2504; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 2505; GFX1064-NEXT: s_add_u32 s36, s36, s11 2506; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 2507; GFX1064-NEXT: s_addc_u32 s37, s37, 0 2508; GFX1064-NEXT: s_mov_b32 s12, s8 2509; GFX1064-NEXT: s_add_u32 s8, s34, 44 2510; GFX1064-NEXT: s_mov_b32 s13, s9 2511; GFX1064-NEXT: s_addc_u32 s9, s35, 0 2512; GFX1064-NEXT: s_getpc_b64 s[4:5] 2513; GFX1064-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2514; GFX1064-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2515; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2516; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2517; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2518; GFX1064-NEXT: s_mov_b32 s14, s10 2519; GFX1064-NEXT: s_mov_b64 s[10:11], s[6:7] 2520; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] 2521; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 2522; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 2523; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] 2524; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] 2525; GFX1064-NEXT: s_mov_b32 s32, 0 2526; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 2527; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 2528; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 2529; GFX1064-NEXT: s_mov_b64 s[0:1], exec 2530; GFX1064-NEXT: .LBB5_1: ; %ComputeLoop 2531; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 2532; GFX1064-NEXT: s_ff1_i32_b64 s2, s[0:1] 2533; GFX1064-NEXT: v_max_f32_e32 v1, v1, v1 2534; GFX1064-NEXT: v_readlane_b32 s3, v0, s2 2535; GFX1064-NEXT: v_max_f32_e64 v2, s3, s3 2536; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s2 2537; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 2538; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 2539; GFX1064-NEXT: v_max_f32_e32 v1, v1, v2 2540; GFX1064-NEXT: s_cbranch_scc1 .LBB5_1 2541; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 2542; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2543; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2544; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2545; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 2546; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 2547; GFX1064-NEXT: s_cbranch_execz .LBB5_4 2548; GFX1064-NEXT: ; %bb.3: 2549; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2550; GFX1064-NEXT: v_mov_b32_e32 v0, 0 2551; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 2552; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 2553; GFX1064-NEXT: .LBB5_4: 2554; GFX1064-NEXT: s_endpgm 2555; 2556; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2557; GFX1032: ; %bb.0: 2558; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2559; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2560; GFX1032-NEXT: s_mov_b32 s38, -1 2561; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 2562; GFX1032-NEXT: s_add_u32 s36, s36, s11 2563; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 2564; GFX1032-NEXT: s_addc_u32 s37, s37, 0 2565; GFX1032-NEXT: s_mov_b32 s12, s8 2566; GFX1032-NEXT: s_add_u32 s8, s34, 44 2567; GFX1032-NEXT: s_mov_b32 s13, s9 2568; GFX1032-NEXT: s_addc_u32 s9, s35, 0 2569; GFX1032-NEXT: s_getpc_b64 s[4:5] 2570; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2571; GFX1032-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2572; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2573; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2574; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2575; GFX1032-NEXT: s_mov_b32 s14, s10 2576; GFX1032-NEXT: s_mov_b64 s[10:11], s[6:7] 2577; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] 2578; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 2579; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 2580; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] 2581; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] 2582; GFX1032-NEXT: s_mov_b32 s32, 0 2583; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 2584; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 2585; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 2586; GFX1032-NEXT: s_mov_b32 s0, exec_lo 2587; GFX1032-NEXT: .LBB5_1: ; %ComputeLoop 2588; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 2589; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 2590; GFX1032-NEXT: v_max_f32_e32 v1, v1, v1 2591; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 2592; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 2593; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 2594; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 2595; GFX1032-NEXT: v_max_f32_e64 v2, s2, s2 2596; GFX1032-NEXT: v_max_f32_e32 v1, v1, v2 2597; GFX1032-NEXT: s_cbranch_scc1 .LBB5_1 2598; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 2599; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2600; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 2601; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 2602; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 2603; GFX1032-NEXT: s_cbranch_execz .LBB5_4 2604; GFX1032-NEXT: ; %bb.3: 2605; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2606; GFX1032-NEXT: v_mov_b32_e32 v0, 0 2607; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 2608; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 2609; GFX1032-NEXT: .LBB5_4: 2610; GFX1032-NEXT: s_endpgm 2611; 2612; GFX1164-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2613; GFX1164: ; %bb.0: 2614; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 2615; GFX1164-NEXT: s_mov_b32 s12, s8 2616; GFX1164-NEXT: s_add_u32 s8, s34, 44 2617; GFX1164-NEXT: s_mov_b32 s13, s9 2618; GFX1164-NEXT: s_addc_u32 s9, s35, 0 2619; GFX1164-NEXT: s_getpc_b64 s[4:5] 2620; GFX1164-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2621; GFX1164-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2622; GFX1164-NEXT: v_mov_b32_e32 v31, v0 2623; GFX1164-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 2624; GFX1164-NEXT: s_mov_b32 s14, s10 2625; GFX1164-NEXT: s_mov_b64 s[10:11], s[6:7] 2626; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] 2627; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 2628; GFX1164-NEXT: s_mov_b32 s32, 0 2629; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 2630; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] 2631; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 2632; GFX1164-NEXT: s_mov_b64 s[0:1], exec 2633; GFX1164-NEXT: .LBB5_1: ; %ComputeLoop 2634; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 2635; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2636; GFX1164-NEXT: s_ctz_i32_b64 s2, s[0:1] 2637; GFX1164-NEXT: v_max_f32_e32 v1, v1, v1 2638; GFX1164-NEXT: v_readlane_b32 s3, v0, s2 2639; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 2640; GFX1164-NEXT: v_max_f32_e64 v2, s3, s3 2641; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s2 2642; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 2643; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2644; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 2645; GFX1164-NEXT: v_max_f32_e32 v1, v1, v2 2646; GFX1164-NEXT: s_cbranch_scc1 .LBB5_1 2647; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 2648; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2649; GFX1164-NEXT: s_mov_b64 s[0:1], exec 2650; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2651; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 2652; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 2653; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 2654; GFX1164-NEXT: s_cbranch_execz .LBB5_4 2655; GFX1164-NEXT: ; %bb.3: 2656; GFX1164-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 2657; GFX1164-NEXT: v_mov_b32_e32 v0, 0 2658; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 2659; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2660; GFX1164-NEXT: .LBB5_4: 2661; GFX1164-NEXT: s_endpgm 2662; 2663; GFX1132-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2664; GFX1132: ; %bb.0: 2665; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 2666; GFX1132-NEXT: v_mov_b32_e32 v31, v0 2667; GFX1132-NEXT: s_add_u32 s8, s34, 44 2668; GFX1132-NEXT: s_addc_u32 s9, s35, 0 2669; GFX1132-NEXT: s_getpc_b64 s[4:5] 2670; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2671; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2672; GFX1132-NEXT: s_mov_b32 s12, s13 2673; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 2674; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] 2675; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] 2676; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 2677; GFX1132-NEXT: s_mov_b32 s13, s14 2678; GFX1132-NEXT: s_mov_b32 s14, s15 2679; GFX1132-NEXT: s_mov_b32 s32, 0 2680; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 2681; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] 2682; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 2683; GFX1132-NEXT: s_mov_b32 s0, exec_lo 2684; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop 2685; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 2686; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2687; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 2688; GFX1132-NEXT: v_max_f32_e32 v1, v1, v1 2689; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 2690; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 2691; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 2692; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 2693; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 2694; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2695; GFX1132-NEXT: v_max_f32_e64 v2, s2, s2 2696; GFX1132-NEXT: v_max_f32_e32 v1, v1, v2 2697; GFX1132-NEXT: s_cbranch_scc1 .LBB5_1 2698; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 2699; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2700; GFX1132-NEXT: s_mov_b32 s0, exec_lo 2701; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 2702; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 2703; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 2704; GFX1132-NEXT: s_cbranch_execz .LBB5_4 2705; GFX1132-NEXT: ; %bb.3: 2706; GFX1132-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 2707; GFX1132-NEXT: v_mov_b32_e32 v0, 0 2708; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 2709; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 2710; GFX1132-NEXT: .LBB5_4: 2711; GFX1132-NEXT: s_endpgm 2712; 2713; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2714; GFX7LESS-DPP: ; %bb.0: 2715; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 2716; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 2717; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 2718; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 2719; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 2720; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 2721; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 2722; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 2723; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 2724; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 2725; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 2726; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 2727; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 2728; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 2729; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 2730; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 2731; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] 2732; GFX7LESS-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2733; GFX7LESS-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2734; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2735; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2736; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2737; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 2738; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 2739; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 2740; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 2741; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] 2742; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] 2743; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 2744; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 2745; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 2746; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 2747; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 2748; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start 2749; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 2750; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 2751; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 2752; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, v0, v2 2753; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 2754; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 2755; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 2756; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc 2757; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 2758; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 2759; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 2760; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v3 2761; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[0:1] 2762; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB5_1 2763; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 2764; GFX7LESS-DPP-NEXT: s_endpgm 2765; 2766; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2767; GFX9-DPP: ; %bb.0: 2768; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2769; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2770; GFX9-DPP-NEXT: s_mov_b32 s38, -1 2771; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 2772; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 2773; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 2774; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 2775; GFX9-DPP-NEXT: s_mov_b32 s12, s8 2776; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 2777; GFX9-DPP-NEXT: s_mov_b32 s13, s9 2778; GFX9-DPP-NEXT: s_addc_u32 s9, s35, 0 2779; GFX9-DPP-NEXT: s_getpc_b64 s[4:5] 2780; GFX9-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2781; GFX9-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2782; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2783; GFX9-DPP-NEXT: s_mov_b32 s14, s10 2784; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 2785; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2786; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2787; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 2788; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 2789; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 2790; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 2791; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 2792; GFX9-DPP-NEXT: s_mov_b32 s32, 0 2793; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 2794; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 2795; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 2796; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 2797; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 2798; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 2799; GFX9-DPP-NEXT: v_cndmask_b32_e64 v4, v3, v0, s[0:1] 2800; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2801; GFX9-DPP-NEXT: s_nop 1 2802; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf 2803; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 2804; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 2805; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 2806; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2807; GFX9-DPP-NEXT: s_nop 1 2808; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf 2809; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 2810; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 2811; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2812; GFX9-DPP-NEXT: s_nop 1 2813; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf 2814; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 2815; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 2816; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2817; GFX9-DPP-NEXT: s_nop 1 2818; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf 2819; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 2820; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 2821; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2822; GFX9-DPP-NEXT: s_nop 1 2823; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf 2824; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 2825; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 2826; GFX9-DPP-NEXT: s_nop 1 2827; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf 2828; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 2829; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 2830; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 2831; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 2832; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2833; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 2834; GFX9-DPP-NEXT: s_cbranch_execz .LBB5_3 2835; GFX9-DPP-NEXT: ; %bb.1: 2836; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2837; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 2838; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 2839; GFX9-DPP-NEXT: v_max_f32_e64 v6, s4, s4 2840; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 2841; GFX9-DPP-NEXT: global_load_dword v1, v2, s[0:1] 2842; GFX9-DPP-NEXT: .LBB5_2: ; %atomicrmw.start 2843; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 2844; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 2845; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 2846; GFX9-DPP-NEXT: v_max_f32_e32 v0, v0, v6 2847; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 2848; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 2849; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 2850; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 2851; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 2852; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 2853; GFX9-DPP-NEXT: s_cbranch_execnz .LBB5_2 2854; GFX9-DPP-NEXT: .LBB5_3: 2855; GFX9-DPP-NEXT: s_endpgm 2856; 2857; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2858; GFX1064-DPP: ; %bb.0: 2859; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2860; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2861; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 2862; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 2863; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 2864; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 2865; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 2866; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 2867; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 2868; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 2869; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 2870; GFX1064-DPP-NEXT: s_getpc_b64 s[4:5] 2871; GFX1064-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2872; GFX1064-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2873; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2874; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2875; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2876; GFX1064-DPP-NEXT: s_mov_b32 s14, s10 2877; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 2878; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 2879; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 2880; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 2881; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 2882; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 2883; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 2884; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 2885; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 2886; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 2887; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 2888; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s[0:1] 2889; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2890; GFX1064-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 2891; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 2892; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v3 2893; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 2894; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 2895; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2896; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2897; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2898; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 2899; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2900; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2901; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2902; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 2903; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2904; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2905; GFX1064-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 2906; GFX1064-DPP-NEXT: v_max_f32_e32 v4, v4, v4 2907; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2908; GFX1064-DPP-NEXT: v_readlane_b32 s2, v3, 32 2909; GFX1064-DPP-NEXT: v_readlane_b32 s3, v3, 0 2910; GFX1064-DPP-NEXT: v_max_f32_e64 v3, s2, s2 2911; GFX1064-DPP-NEXT: v_max_f32_e64 v4, s3, s3 2912; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 2913; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 2914; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 2915; GFX1064-DPP-NEXT: v_max_f32_e32 v3, v4, v3 2916; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 2917; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 2918; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, v3 2919; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2920; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 2921; GFX1064-DPP-NEXT: s_cbranch_execz .LBB5_2 2922; GFX1064-DPP-NEXT: ; %bb.1: 2923; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2924; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 2925; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 2926; GFX1064-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 2927; GFX1064-DPP-NEXT: .LBB5_2: 2928; GFX1064-DPP-NEXT: s_endpgm 2929; 2930; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2931; GFX1032-DPP: ; %bb.0: 2932; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2933; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2934; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 2935; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 2936; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 2937; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 2938; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 2939; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 2940; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 2941; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 2942; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 2943; GFX1032-DPP-NEXT: s_getpc_b64 s[4:5] 2944; GFX1032-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 2945; GFX1032-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 2946; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 2947; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 2948; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 2949; GFX1032-DPP-NEXT: s_mov_b32 s14, s10 2950; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 2951; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 2952; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 2953; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 2954; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 2955; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 2956; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 2957; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 2958; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 2959; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 2960; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 2961; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, v0, s0 2962; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2963; GFX1032-DPP-NEXT: v_mov_b32_dpp v3, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 2964; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 2965; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v3 2966; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v4, v3 2967; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 2968; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2969; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2970; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2971; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 2972; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2973; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 2974; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2975; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 2976; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v5, v5 2977; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2978; GFX1032-DPP-NEXT: v_permlanex16_b32 v4, v3, 0, 0 2979; GFX1032-DPP-NEXT: v_max_f32_e32 v4, v4, v4 2980; GFX1032-DPP-NEXT: v_max_f32_e32 v3, v3, v4 2981; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 2982; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 2983; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 2984; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 2985; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 2986; GFX1032-DPP-NEXT: s_cbranch_execz .LBB5_2 2987; GFX1032-DPP-NEXT: ; %bb.1: 2988; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 2989; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 2990; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 2991; GFX1032-DPP-NEXT: global_atomic_fmax v1, v0, s[0:1] 2992; GFX1032-DPP-NEXT: .LBB5_2: 2993; GFX1032-DPP-NEXT: s_endpgm 2994; 2995; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 2996; GFX1164-DPP: ; %bb.0: 2997; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 2998; GFX1164-DPP-NEXT: s_mov_b32 s12, s8 2999; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 3000; GFX1164-DPP-NEXT: s_mov_b32 s13, s9 3001; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 3002; GFX1164-DPP-NEXT: s_getpc_b64 s[4:5] 3003; GFX1164-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 3004; GFX1164-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 3005; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 3006; GFX1164-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 3007; GFX1164-DPP-NEXT: s_mov_b32 s14, s10 3008; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 3009; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 3010; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 3011; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 3012; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 3013; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3014; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 3015; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 3016; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s[0:1] 3017; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3018; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 3019; GFX1164-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 3020; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 3021; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v1 3022; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3023; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v2, v1 3024; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 3025; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 3026; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 3027; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3028; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3029; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3030; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 3031; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 3032; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3033; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3034; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3035; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 3036; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3037; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v3, v3 3038; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3039; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3040; GFX1164-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 3041; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 3042; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3043; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3044; GFX1164-DPP-NEXT: v_permlane64_b32 v2, v1 3045; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 3046; GFX1164-DPP-NEXT: v_max_f32_e32 v2, v2, v2 3047; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 3048; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 3049; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 3050; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 3051; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3052; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 3053; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 3054; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 3055; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 3056; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 3057; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 3058; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 3059; GFX1164-DPP-NEXT: s_cbranch_execz .LBB5_2 3060; GFX1164-DPP-NEXT: ; %bb.1: 3061; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 3062; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 3063; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 3064; GFX1164-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 3065; GFX1164-DPP-NEXT: .LBB5_2: 3066; GFX1164-DPP-NEXT: s_endpgm 3067; 3068; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: 3069; GFX1132-DPP: ; %bb.0: 3070; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 3071; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 3072; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 3073; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 3074; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] 3075; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 3076; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 3077; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 3078; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 3079; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 3080; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 3081; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 3082; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 3083; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 3084; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 3085; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 3086; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3087; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 3088; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 3089; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 3090; GFX1132-DPP-NEXT: v_mov_b32_e32 v1, 0x7fc00000 3091; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3092; GFX1132-DPP-NEXT: v_mov_b32_dpp v1, v2 row_xmask:1 row_mask:0xf bank_mask:0xf 3093; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3094; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1 3095; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v2, v1 3096; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3097; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:2 row_mask:0xf bank_mask:0xf 3098; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 3099; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3100; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3101; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:4 row_mask:0xf bank_mask:0xf 3102; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3103; GFX1132-DPP-NEXT: v_dual_max_f32 v2, v3, v3 :: v_dual_mov_b32 v3, 0x7fc00000 3104; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3105; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3106; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v1 row_xmask:8 row_mask:0xf bank_mask:0xf 3107; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v3, v3 3108; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3109; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3110; GFX1132-DPP-NEXT: v_permlanex16_b32 v2, v1, 0, 0 3111; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3112; GFX1132-DPP-NEXT: v_max_f32_e32 v2, v2, v2 3113; GFX1132-DPP-NEXT: v_max_f32_e32 v1, v1, v2 3114; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 3115; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 3116; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v4, exec_lo, 0 3117; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, v1 3118; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 3119; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 3120; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 3121; GFX1132-DPP-NEXT: s_cbranch_execz .LBB5_2 3122; GFX1132-DPP-NEXT: ; %bb.1: 3123; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 3124; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0 3125; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 3126; GFX1132-DPP-NEXT: global_atomic_max_f32 v4, v0, s[0:1] 3127; GFX1132-DPP-NEXT: .LBB5_2: 3128; GFX1132-DPP-NEXT: s_endpgm 3129 %divValue = call float @div.float.value() 3130 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %divValue monotonic, align 4, !amdgpu.no.fine.grained.memory !1 3131 ret void 3132} 3133 3134define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 { 3135; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3136; GFX7LESS: ; %bb.0: 3137; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 3138; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3139; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3140; GFX7LESS-NEXT: s_mov_b32 s50, -1 3141; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 3142; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 3143; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 3144; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] 3145; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 3146; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 3147; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3148; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 3149; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3 3150; GFX7LESS-NEXT: ; %bb.1: 3151; GFX7LESS-NEXT: s_mov_b32 s33, s10 3152; GFX7LESS-NEXT: s_mov_b32 s42, s9 3153; GFX7LESS-NEXT: s_mov_b32 s43, s8 3154; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] 3155; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] 3156; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] 3157; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 3158; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 3159; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3160; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 3161; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 3162; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 3163; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 3164; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 3165; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0 3166; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1 3167; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2 3168; GFX7LESS-NEXT: .LBB6_2: ; %atomicrmw.start 3169; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 3170; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 3171; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 3172; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 3173; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 3174; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 3175; GFX7LESS-NEXT: s_waitcnt expcnt(0) 3176; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 3177; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 3178; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 3179; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3180; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3181; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 3182; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 3183; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3184; GFX7LESS-NEXT: s_waitcnt expcnt(0) 3185; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 3186; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 3187; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0 3188; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 3189; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 3190; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 3191; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 3192; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] 3193; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 3194; GFX7LESS-NEXT: s_mov_b32 s12, s43 3195; GFX7LESS-NEXT: s_mov_b32 s13, s42 3196; GFX7LESS-NEXT: s_mov_b32 s14, s33 3197; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 3198; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 3199; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 3200; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 3201; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 3202; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 3203; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 3204; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 3205; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 3206; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 3207; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 3208; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3209; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] 3210; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2 3211; GFX7LESS-NEXT: .LBB6_3: 3212; GFX7LESS-NEXT: s_endpgm 3213; 3214; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3215; GFX9: ; %bb.0: 3216; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3217; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3218; GFX9-NEXT: s_mov_b32 s50, -1 3219; GFX9-NEXT: s_mov_b32 s51, 0xe00000 3220; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3221; GFX9-NEXT: s_add_u32 s48, s48, s11 3222; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 3223; GFX9-NEXT: s_addc_u32 s49, s49, 0 3224; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] 3225; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3226; GFX9-NEXT: s_movk_i32 s32, 0x800 3227; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 3228; GFX9-NEXT: s_cbranch_execz .LBB6_3 3229; GFX9-NEXT: ; %bb.1: 3230; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3231; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3232; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3233; GFX9-NEXT: s_mov_b32 s33, s10 3234; GFX9-NEXT: s_mov_b32 s42, s9 3235; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3236; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3237; GFX9-NEXT: s_mov_b32 s43, s8 3238; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] 3239; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] 3240; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] 3241; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3242; GFX9-NEXT: v_mov_b32_e32 v2, s1 3243; GFX9-NEXT: s_mov_b64 s[46:47], 0 3244; GFX9-NEXT: v_mov_b32_e32 v1, s0 3245; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 3246; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start 3247; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3248; GFX9-NEXT: s_waitcnt vmcnt(0) 3249; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3250; GFX9-NEXT: s_add_u32 s8, s36, 44 3251; GFX9-NEXT: s_addc_u32 s9, s37, 0 3252; GFX9-NEXT: s_getpc_b64 s[0:1] 3253; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3254; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3255; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3256; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 3257; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3258; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 3259; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 3260; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3261; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] 3262; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 3263; GFX9-NEXT: s_mov_b32 s12, s43 3264; GFX9-NEXT: s_mov_b32 s13, s42 3265; GFX9-NEXT: s_mov_b32 s14, s33 3266; GFX9-NEXT: v_mov_b32_e32 v31, v40 3267; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 3268; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3269; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3270; GFX9-NEXT: v_mov_b32_e32 v0, 8 3271; GFX9-NEXT: v_mov_b32_e32 v1, 0 3272; GFX9-NEXT: v_mov_b32_e32 v2, s44 3273; GFX9-NEXT: v_mov_b32_e32 v3, s45 3274; GFX9-NEXT: v_mov_b32_e32 v4, 0 3275; GFX9-NEXT: v_mov_b32_e32 v5, 8 3276; GFX9-NEXT: v_mov_b32_e32 v6, 0 3277; GFX9-NEXT: v_mov_b32_e32 v7, 0 3278; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3279; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 3280; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 3281; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3282; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 3283; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3284; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3285; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] 3286; GFX9-NEXT: s_cbranch_execnz .LBB6_2 3287; GFX9-NEXT: .LBB6_3: 3288; GFX9-NEXT: s_endpgm 3289; 3290; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3291; GFX1064: ; %bb.0: 3292; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3293; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3294; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3295; GFX1064-NEXT: s_mov_b32 s50, -1 3296; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 3297; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 3298; GFX1064-NEXT: s_add_u32 s48, s48, s11 3299; GFX1064-NEXT: s_addc_u32 s49, s49, 0 3300; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] 3301; GFX1064-NEXT: s_movk_i32 s32, 0x800 3302; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3303; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 3304; GFX1064-NEXT: s_cbranch_execz .LBB6_3 3305; GFX1064-NEXT: ; %bb.1: 3306; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3307; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3308; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3309; GFX1064-NEXT: s_mov_b32 s33, s10 3310; GFX1064-NEXT: s_mov_b32 s42, s9 3311; GFX1064-NEXT: s_mov_b32 s43, s8 3312; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] 3313; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 3314; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] 3315; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] 3316; GFX1064-NEXT: s_mov_b64 s[46:47], 0 3317; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 3318; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3319; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 3320; GFX1064-NEXT: v_mov_b32_e32 v2, s1 3321; GFX1064-NEXT: v_mov_b32_e32 v1, s0 3322; GFX1064-NEXT: .LBB6_2: ; %atomicrmw.start 3323; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 3324; GFX1064-NEXT: s_waitcnt vmcnt(0) 3325; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3326; GFX1064-NEXT: s_add_u32 s8, s36, 44 3327; GFX1064-NEXT: s_addc_u32 s9, s37, 0 3328; GFX1064-NEXT: s_getpc_b64 s[0:1] 3329; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3330; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3331; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3332; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 3333; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3334; GFX1064-NEXT: v_mov_b32_e32 v31, v40 3335; GFX1064-NEXT: v_mov_b32_e32 v0, 8 3336; GFX1064-NEXT: v_mov_b32_e32 v1, 0 3337; GFX1064-NEXT: v_mov_b32_e32 v2, s44 3338; GFX1064-NEXT: v_mov_b32_e32 v5, 8 3339; GFX1064-NEXT: v_mov_b32_e32 v6, 0 3340; GFX1064-NEXT: v_mov_b32_e32 v7, 0 3341; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 3342; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 3343; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] 3344; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] 3345; GFX1064-NEXT: s_mov_b32 s12, s43 3346; GFX1064-NEXT: s_mov_b32 s13, s42 3347; GFX1064-NEXT: s_mov_b32 s14, s33 3348; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 3349; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3350; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3351; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3352; GFX1064-NEXT: v_mov_b32_e32 v3, s45 3353; GFX1064-NEXT: v_mov_b32_e32 v4, 0 3354; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 3355; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 3356; GFX1064-NEXT: s_clause 0x1 3357; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 3358; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3359; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 3360; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3361; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3362; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] 3363; GFX1064-NEXT: s_cbranch_execnz .LBB6_2 3364; GFX1064-NEXT: .LBB6_3: 3365; GFX1064-NEXT: s_endpgm 3366; 3367; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3368; GFX1032: ; %bb.0: 3369; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3370; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3371; GFX1032-NEXT: s_mov_b32 s50, -1 3372; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3373; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 3374; GFX1032-NEXT: s_add_u32 s48, s48, s11 3375; GFX1032-NEXT: s_addc_u32 s49, s49, 0 3376; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] 3377; GFX1032-NEXT: s_mov_b32 s46, 0 3378; GFX1032-NEXT: s_movk_i32 s32, 0x400 3379; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 3380; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 3381; GFX1032-NEXT: s_cbranch_execz .LBB6_3 3382; GFX1032-NEXT: ; %bb.1: 3383; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3384; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3385; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3386; GFX1032-NEXT: s_mov_b32 s33, s10 3387; GFX1032-NEXT: s_mov_b32 s42, s9 3388; GFX1032-NEXT: s_mov_b32 s43, s8 3389; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] 3390; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 3391; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] 3392; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] 3393; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 3394; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3395; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 3396; GFX1032-NEXT: v_mov_b32_e32 v2, s1 3397; GFX1032-NEXT: v_mov_b32_e32 v1, s0 3398; GFX1032-NEXT: .LBB6_2: ; %atomicrmw.start 3399; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 3400; GFX1032-NEXT: s_waitcnt vmcnt(0) 3401; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3402; GFX1032-NEXT: s_add_u32 s8, s36, 44 3403; GFX1032-NEXT: s_addc_u32 s9, s37, 0 3404; GFX1032-NEXT: s_getpc_b64 s[0:1] 3405; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3406; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3407; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3408; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 3409; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3410; GFX1032-NEXT: v_mov_b32_e32 v31, v40 3411; GFX1032-NEXT: v_mov_b32_e32 v0, 8 3412; GFX1032-NEXT: v_mov_b32_e32 v1, 0 3413; GFX1032-NEXT: v_mov_b32_e32 v2, s44 3414; GFX1032-NEXT: v_mov_b32_e32 v5, 8 3415; GFX1032-NEXT: v_mov_b32_e32 v6, 0 3416; GFX1032-NEXT: v_mov_b32_e32 v7, 0 3417; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 3418; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 3419; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] 3420; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] 3421; GFX1032-NEXT: s_mov_b32 s12, s43 3422; GFX1032-NEXT: s_mov_b32 s13, s42 3423; GFX1032-NEXT: s_mov_b32 s14, s33 3424; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 3425; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3426; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3427; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3428; GFX1032-NEXT: v_mov_b32_e32 v3, s45 3429; GFX1032-NEXT: v_mov_b32_e32 v4, 0 3430; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 3431; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 3432; GFX1032-NEXT: s_clause 0x1 3433; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 3434; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3435; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 3436; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3437; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 3438; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 3439; GFX1032-NEXT: s_cbranch_execnz .LBB6_2 3440; GFX1032-NEXT: .LBB6_3: 3441; GFX1032-NEXT: s_endpgm 3442; 3443; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3444; GFX1164: ; %bb.0: 3445; GFX1164-NEXT: v_mov_b32_e32 v40, v0 3446; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 3447; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] 3448; GFX1164-NEXT: s_mov_b32 s32, 32 3449; GFX1164-NEXT: s_mov_b64 s[0:1], exec 3450; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3451; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 3452; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 3453; GFX1164-NEXT: s_cbranch_execz .LBB6_3 3454; GFX1164-NEXT: ; %bb.1: 3455; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 3456; GFX1164-NEXT: s_mov_b32 s33, s10 3457; GFX1164-NEXT: s_mov_b32 s42, s9 3458; GFX1164-NEXT: s_mov_b32 s43, s8 3459; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] 3460; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] 3461; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] 3462; GFX1164-NEXT: s_mov_b64 s[46:47], 0 3463; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 3464; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 3465; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 3466; GFX1164-NEXT: v_mov_b32_e32 v2, s1 3467; GFX1164-NEXT: v_mov_b32_e32 v1, s0 3468; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 3469; GFX1164-NEXT: .p2align 6 3470; GFX1164-NEXT: .LBB6_2: ; %atomicrmw.start 3471; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 3472; GFX1164-NEXT: s_waitcnt vmcnt(0) 3473; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) 3474; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3475; GFX1164-NEXT: s_add_u32 s8, s36, 44 3476; GFX1164-NEXT: s_addc_u32 s9, s37, 0 3477; GFX1164-NEXT: s_getpc_b64 s[0:1] 3478; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3479; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3480; GFX1164-NEXT: v_mov_b32_e32 v31, v40 3481; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 3482; GFX1164-NEXT: v_mov_b32_e32 v0, 8 3483; GFX1164-NEXT: v_mov_b32_e32 v5, 8 3484; GFX1164-NEXT: v_mov_b32_e32 v6, 0 3485; GFX1164-NEXT: v_mov_b32_e32 v7, 0 3486; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 3487; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] 3488; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] 3489; GFX1164-NEXT: s_mov_b32 s12, s43 3490; GFX1164-NEXT: s_mov_b32 s13, s42 3491; GFX1164-NEXT: s_mov_b32 s14, s33 3492; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3493; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off 3494; GFX1164-NEXT: v_mov_b32_e32 v1, 0 3495; GFX1164-NEXT: v_mov_b32_e32 v2, s44 3496; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 3497; GFX1164-NEXT: v_mov_b32_e32 v3, s45 3498; GFX1164-NEXT: v_mov_b32_e32 v4, 0 3499; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 3500; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 3501; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off 3502; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 3503; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 3504; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3505; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3506; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] 3507; GFX1164-NEXT: s_cbranch_execnz .LBB6_2 3508; GFX1164-NEXT: .LBB6_3: 3509; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 3510; GFX1164-NEXT: s_endpgm 3511; 3512; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3513; GFX1132: ; %bb.0: 3514; GFX1132-NEXT: v_mov_b32_e32 v40, v0 3515; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 3516; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] 3517; GFX1132-NEXT: s_mov_b32 s46, 0 3518; GFX1132-NEXT: s_mov_b32 s32, 32 3519; GFX1132-NEXT: s_mov_b32 s0, exec_lo 3520; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 3521; GFX1132-NEXT: s_cbranch_execz .LBB6_3 3522; GFX1132-NEXT: ; %bb.1: 3523; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 3524; GFX1132-NEXT: s_mov_b32 s33, s15 3525; GFX1132-NEXT: s_mov_b32 s42, s14 3526; GFX1132-NEXT: s_mov_b32 s43, s13 3527; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] 3528; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] 3529; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] 3530; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 3531; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 3532; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 3533; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 3534; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 3535; GFX1132-NEXT: .p2align 6 3536; GFX1132-NEXT: .LBB6_2: ; %atomicrmw.start 3537; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 3538; GFX1132-NEXT: s_waitcnt vmcnt(0) 3539; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 3540; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3541; GFX1132-NEXT: s_add_u32 s8, s36, 44 3542; GFX1132-NEXT: s_addc_u32 s9, s37, 0 3543; GFX1132-NEXT: s_getpc_b64 s[0:1] 3544; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3545; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3546; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 3547; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 3548; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 3549; GFX1132-NEXT: v_mov_b32_e32 v7, 0 3550; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 3551; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] 3552; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] 3553; GFX1132-NEXT: s_mov_b32 s12, s43 3554; GFX1132-NEXT: s_mov_b32 s13, s42 3555; GFX1132-NEXT: s_mov_b32 s14, s33 3556; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) 3557; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3558; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off 3559; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 3560; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 3561; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 3562; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 3563; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 3564; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off 3565; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 3566; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 3567; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3568; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 3569; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 3570; GFX1132-NEXT: s_cbranch_execnz .LBB6_2 3571; GFX1132-NEXT: .LBB6_3: 3572; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 3573; GFX1132-NEXT: s_endpgm 3574; 3575; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3576; GFX7LESS-DPP: ; %bb.0: 3577; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 3578; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3579; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3580; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 3581; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 3582; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 3583; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 3584; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3585; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 3586; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 3587; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3588; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 3589; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3 3590; GFX7LESS-DPP-NEXT: ; %bb.1: 3591; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 3592; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 3593; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 3594; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3595; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3596; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3597; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 3598; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 3599; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3600; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 3601; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 3602; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 3603; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 3604; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 3605; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0 3606; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1 3607; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2 3608; GFX7LESS-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3609; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3610; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 3611; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 3612; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 3613; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 3614; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 3615; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 3616; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 3617; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 3618; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 3619; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3620; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3621; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 3622; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 3623; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3624; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 3625; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 3626; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0 3627; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0 3628; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 3629; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 3630; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 3631; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3632; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3633; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3634; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 3635; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 3636; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 3637; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 3638; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 3639; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 3640; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 3641; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 3642; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 3643; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3644; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 3645; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 3646; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 3647; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 3648; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3649; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 3650; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2 3651; GFX7LESS-DPP-NEXT: .LBB6_3: 3652; GFX7LESS-DPP-NEXT: s_endpgm 3653; 3654; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3655; GFX9-DPP: ; %bb.0: 3656; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3657; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3658; GFX9-DPP-NEXT: s_mov_b32 s50, -1 3659; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 3660; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3661; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 3662; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 3663; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 3664; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3665; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3666; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 3667; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 3668; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3 3669; GFX9-DPP-NEXT: ; %bb.1: 3670; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3671; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3672; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3673; GFX9-DPP-NEXT: s_mov_b32 s33, s10 3674; GFX9-DPP-NEXT: s_mov_b32 s42, s9 3675; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 3676; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3677; GFX9-DPP-NEXT: s_mov_b32 s43, s8 3678; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3679; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3680; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3681; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 3682; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 3683; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 3684; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 3685; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 3686; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3687; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3688; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 3689; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3690; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 3691; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 3692; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 3693; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3694; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3695; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3696; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 3697; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3698; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 3699; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3700; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3701; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3702; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3703; GFX9-DPP-NEXT: s_mov_b32 s12, s43 3704; GFX9-DPP-NEXT: s_mov_b32 s13, s42 3705; GFX9-DPP-NEXT: s_mov_b32 s14, s33 3706; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 3707; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 3708; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3709; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3710; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 3711; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 3712; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 3713; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 3714; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 3715; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 3716; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 3717; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 3718; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 3719; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3720; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 3721; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3722; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 3723; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3724; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3725; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 3726; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2 3727; GFX9-DPP-NEXT: .LBB6_3: 3728; GFX9-DPP-NEXT: s_endpgm 3729; 3730; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3731; GFX1064-DPP: ; %bb.0: 3732; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3733; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3734; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3735; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 3736; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 3737; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 3738; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 3739; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 3740; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3741; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 3742; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 3743; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 3744; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3 3745; GFX1064-DPP-NEXT: ; %bb.1: 3746; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3747; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3748; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3749; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 3750; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 3751; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 3752; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3753; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 3754; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3755; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3756; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 3757; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 3758; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3759; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 3760; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 3761; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 3762; GFX1064-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3763; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3764; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) 3765; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3766; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44 3767; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0 3768; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 3769; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3770; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3771; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3772; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 3773; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3774; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 3775; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 3776; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 3777; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 3778; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 3779; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 3780; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 3781; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 3782; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3783; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3784; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3785; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 3786; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 3787; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 3788; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 3789; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3790; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3791; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3792; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 3793; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 3794; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 3795; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3796; GFX1064-DPP-NEXT: s_clause 0x1 3797; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 3798; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3799; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 3800; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3801; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3802; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 3803; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2 3804; GFX1064-DPP-NEXT: .LBB6_3: 3805; GFX1064-DPP-NEXT: s_endpgm 3806; 3807; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3808; GFX1032-DPP: ; %bb.0: 3809; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 3810; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 3811; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 3812; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 3813; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 3814; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 3815; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 3816; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3817; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 3818; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 3819; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 3820; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 3821; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 3822; GFX1032-DPP-NEXT: ; %bb.1: 3823; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 3824; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 3825; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 3826; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 3827; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 3828; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 3829; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3830; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 3831; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3832; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3833; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 3834; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 3835; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 3836; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 3837; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 3838; GFX1032-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3839; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3840; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) 3841; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3842; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44 3843; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0 3844; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 3845; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3846; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3847; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 3848; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 3849; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 3850; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 3851; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 3852; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 3853; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 3854; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 3855; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 3856; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 3857; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 3858; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3859; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3860; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3861; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 3862; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 3863; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 3864; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 3865; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3866; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 3867; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 3868; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 3869; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 3870; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 3871; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 3872; GFX1032-DPP-NEXT: s_clause 0x1 3873; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 3874; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 3875; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 3876; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3877; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 3878; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 3879; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2 3880; GFX1032-DPP-NEXT: .LBB6_3: 3881; GFX1032-DPP-NEXT: s_endpgm 3882; 3883; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3884; GFX1164-DPP: ; %bb.0: 3885; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 3886; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 3887; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3888; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 3889; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 3890; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3891; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 3892; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 3893; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3 3894; GFX1164-DPP-NEXT: ; %bb.1: 3895; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 3896; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 3897; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 3898; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 3899; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3900; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3901; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3902; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 3903; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 3904; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 3905; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 3906; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 3907; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 3908; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 3909; GFX1164-DPP-NEXT: .p2align 6 3910; GFX1164-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3911; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3912; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 3913; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 3914; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3915; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44 3916; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0 3917; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 3918; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3919; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3920; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 3921; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 3922; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 3923; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 3924; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 3925; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 3926; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3927; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3928; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3929; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 3930; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 3931; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 3932; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3933; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off 3934; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 3935; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 3936; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 3937; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 3938; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 3939; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 3940; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 3941; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off 3942; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 3943; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 3944; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3945; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 3946; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] 3947; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2 3948; GFX1164-DPP-NEXT: .LBB6_3: 3949; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 3950; GFX1164-DPP-NEXT: s_endpgm 3951; 3952; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: 3953; GFX1132-DPP: ; %bb.0: 3954; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 3955; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 3956; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 3957; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 3958; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 3959; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 3960; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 3961; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3 3962; GFX1132-DPP-NEXT: ; %bb.1: 3963; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 3964; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 3965; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 3966; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 3967; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 3968; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 3969; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 3970; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 3971; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 3972; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 3973; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 3974; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 3975; GFX1132-DPP-NEXT: .p2align 6 3976; GFX1132-DPP-NEXT: .LBB6_2: ; %atomicrmw.start 3977; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 3978; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 3979; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 3980; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 3981; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44 3982; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0 3983; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 3984; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 3985; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 3986; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 3987; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 3988; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 3989; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 3990; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 3991; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 3992; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 3993; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 3994; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 3995; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 3996; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) 3997; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 3998; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off 3999; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 4000; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 4001; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 4002; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 4003; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 4004; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off 4005; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 4006; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 4007; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 4008; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 4009; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 4010; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 4011; GFX1132-DPP-NEXT: .LBB6_3: 4012; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 4013; GFX1132-DPP-NEXT: s_endpgm 4014 %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 4015 ret void 4016} 4017 4018define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 { 4019; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4020; GFX7LESS: ; %bb.0: 4021; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 4022; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4023; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4024; GFX7LESS-NEXT: s_mov_b32 s50, -1 4025; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 4026; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 4027; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 4028; GFX7LESS-NEXT: s_mov_b32 s33, s10 4029; GFX7LESS-NEXT: s_mov_b32 s42, s9 4030; GFX7LESS-NEXT: s_mov_b32 s43, s8 4031; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] 4032; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] 4033; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] 4034; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] 4035; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 4036; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 4037; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 4038; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4039; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4040; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4041; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4042; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4043; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 4044; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 4045; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 4046; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 4047; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 4048; GFX7LESS-NEXT: s_mov_b32 s12, s43 4049; GFX7LESS-NEXT: s_mov_b32 s13, s42 4050; GFX7LESS-NEXT: s_mov_b32 s14, s33 4051; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 4052; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 4053; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 4054; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 4055; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 4056; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 4057; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 4058; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4059; GFX7LESS-NEXT: .LBB7_1: ; %ComputeLoop 4060; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 4061; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1] 4062; GFX7LESS-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4063; GFX7LESS-NEXT: v_readlane_b32 s3, v1, s4 4064; GFX7LESS-NEXT: v_readlane_b32 s2, v0, s4 4065; GFX7LESS-NEXT: s_lshl_b64 s[4:5], 1, s4 4066; GFX7LESS-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4067; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 4068; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 4069; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 4070; GFX7LESS-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4071; GFX7LESS-NEXT: s_cbranch_vccnz .LBB7_1 4072; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 4073; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 4074; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 4075; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 4076; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 4077; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 4078; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5 4079; GFX7LESS-NEXT: ; %bb.3: 4080; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 4081; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 4082; GFX7LESS-NEXT: s_mov_b32 s46, -1 4083; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 4084; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 4085; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 4086; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4087; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start 4088; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 4089; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 4090; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 4091; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 4092; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 4093; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 4094; GFX7LESS-NEXT: s_waitcnt expcnt(0) 4095; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] 4096; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 4097; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 4098; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4099; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4100; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 4101; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 4102; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4103; GFX7LESS-NEXT: s_waitcnt expcnt(0) 4104; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 4105; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 4106; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0 4107; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 4108; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 4109; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 4110; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 4111; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] 4112; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 4113; GFX7LESS-NEXT: s_mov_b32 s12, s43 4114; GFX7LESS-NEXT: s_mov_b32 s13, s42 4115; GFX7LESS-NEXT: s_mov_b32 s14, s33 4116; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 4117; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 4118; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 4119; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 4120; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 4121; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 4122; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 4123; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 4124; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 4125; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 4126; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 4127; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 4128; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] 4129; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4 4130; GFX7LESS-NEXT: .LBB7_5: 4131; GFX7LESS-NEXT: s_endpgm 4132; 4133; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4134; GFX9: ; %bb.0: 4135; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4136; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4137; GFX9-NEXT: s_mov_b32 s50, -1 4138; GFX9-NEXT: s_mov_b32 s51, 0xe00000 4139; GFX9-NEXT: s_add_u32 s48, s48, s11 4140; GFX9-NEXT: s_addc_u32 s49, s49, 0 4141; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] 4142; GFX9-NEXT: s_mov_b32 s43, s8 4143; GFX9-NEXT: s_add_u32 s8, s36, 44 4144; GFX9-NEXT: s_mov_b32 s42, s9 4145; GFX9-NEXT: s_addc_u32 s9, s37, 0 4146; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] 4147; GFX9-NEXT: s_getpc_b64 s[0:1] 4148; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4149; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4150; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4151; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4152; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4153; GFX9-NEXT: s_mov_b32 s33, s10 4154; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] 4155; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] 4156; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 4157; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 4158; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 4159; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 4160; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 4161; GFX9-NEXT: s_mov_b32 s12, s43 4162; GFX9-NEXT: s_mov_b32 s13, s42 4163; GFX9-NEXT: s_mov_b32 s14, s33 4164; GFX9-NEXT: v_mov_b32_e32 v31, v40 4165; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 4166; GFX9-NEXT: s_movk_i32 s32, 0x800 4167; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4168; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 4169; GFX9-NEXT: v_mov_b32_e32 v2, 0 4170; GFX9-NEXT: s_mov_b64 s[0:1], exec 4171; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4172; GFX9-NEXT: .LBB7_1: ; %ComputeLoop 4173; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4174; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1] 4175; GFX9-NEXT: v_readlane_b32 s3, v1, s4 4176; GFX9-NEXT: v_readlane_b32 s2, v0, s4 4177; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4178; GFX9-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4179; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4 4180; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 4181; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 4182; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4183; GFX9-NEXT: s_cbranch_scc1 .LBB7_1 4184; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 4185; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4186; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 4187; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 4188; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 4189; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 4190; GFX9-NEXT: s_cbranch_execz .LBB7_5 4191; GFX9-NEXT: ; %bb.3: 4192; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 4193; GFX9-NEXT: v_mov_b32_e32 v0, 0 4194; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4195; GFX9-NEXT: s_mov_b64 s[46:47], 0 4196; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4197; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 4198; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start 4199; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4200; GFX9-NEXT: s_waitcnt vmcnt(0) 4201; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 4202; GFX9-NEXT: s_add_u32 s8, s36, 44 4203; GFX9-NEXT: s_addc_u32 s9, s37, 0 4204; GFX9-NEXT: s_getpc_b64 s[0:1] 4205; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4206; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4207; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4208; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 4209; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 4210; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 4211; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 4212; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4213; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] 4214; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 4215; GFX9-NEXT: s_mov_b32 s12, s43 4216; GFX9-NEXT: s_mov_b32 s13, s42 4217; GFX9-NEXT: s_mov_b32 s14, s33 4218; GFX9-NEXT: v_mov_b32_e32 v31, v40 4219; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 4220; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 4221; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 4222; GFX9-NEXT: v_mov_b32_e32 v0, 8 4223; GFX9-NEXT: v_mov_b32_e32 v1, 0 4224; GFX9-NEXT: v_mov_b32_e32 v2, s44 4225; GFX9-NEXT: v_mov_b32_e32 v3, s45 4226; GFX9-NEXT: v_mov_b32_e32 v4, 0 4227; GFX9-NEXT: v_mov_b32_e32 v5, 8 4228; GFX9-NEXT: v_mov_b32_e32 v6, 0 4229; GFX9-NEXT: v_mov_b32_e32 v7, 0 4230; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4231; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 4232; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 4233; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 4234; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 4235; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 4236; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 4237; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] 4238; GFX9-NEXT: s_cbranch_execnz .LBB7_4 4239; GFX9-NEXT: .LBB7_5: 4240; GFX9-NEXT: s_endpgm 4241; 4242; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4243; GFX1064: ; %bb.0: 4244; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4245; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4246; GFX1064-NEXT: s_mov_b32 s50, -1 4247; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 4248; GFX1064-NEXT: s_add_u32 s48, s48, s11 4249; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 4250; GFX1064-NEXT: s_addc_u32 s49, s49, 0 4251; GFX1064-NEXT: s_mov_b32 s43, s8 4252; GFX1064-NEXT: s_add_u32 s8, s34, 44 4253; GFX1064-NEXT: s_mov_b32 s42, s9 4254; GFX1064-NEXT: s_addc_u32 s9, s35, 0 4255; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] 4256; GFX1064-NEXT: s_getpc_b64 s[0:1] 4257; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4258; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4259; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4260; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4261; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4262; GFX1064-NEXT: s_mov_b32 s33, s10 4263; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] 4264; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] 4265; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 4266; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 4267; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 4268; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 4269; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] 4270; GFX1064-NEXT: s_mov_b32 s12, s43 4271; GFX1064-NEXT: v_mov_b32_e32 v31, v40 4272; GFX1064-NEXT: s_mov_b32 s13, s42 4273; GFX1064-NEXT: s_mov_b32 s14, s33 4274; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 4275; GFX1064-NEXT: s_movk_i32 s32, 0x800 4276; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 4277; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 4278; GFX1064-NEXT: v_mov_b32_e32 v2, 0 4279; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4280; GFX1064-NEXT: s_mov_b64 s[0:1], exec 4281; GFX1064-NEXT: .LBB7_1: ; %ComputeLoop 4282; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 4283; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1] 4284; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4285; GFX1064-NEXT: v_readlane_b32 s3, v1, s4 4286; GFX1064-NEXT: v_readlane_b32 s2, v0, s4 4287; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4288; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4 4289; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 4290; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 4291; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4292; GFX1064-NEXT: s_cbranch_scc1 .LBB7_1 4293; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 4294; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4295; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 4296; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 4297; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 4298; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 4299; GFX1064-NEXT: s_cbranch_execz .LBB7_5 4300; GFX1064-NEXT: ; %bb.3: 4301; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 4302; GFX1064-NEXT: v_mov_b32_e32 v0, 0 4303; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4304; GFX1064-NEXT: s_mov_b64 s[46:47], 0 4305; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 4306; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 4307; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start 4308; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 4309; GFX1064-NEXT: s_waitcnt vmcnt(0) 4310; GFX1064-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 4311; GFX1064-NEXT: s_add_u32 s8, s34, 44 4312; GFX1064-NEXT: s_addc_u32 s9, s35, 0 4313; GFX1064-NEXT: s_getpc_b64 s[0:1] 4314; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4315; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4316; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 4317; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 4318; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4319; GFX1064-NEXT: v_mov_b32_e32 v31, v40 4320; GFX1064-NEXT: v_mov_b32_e32 v2, s44 4321; GFX1064-NEXT: v_mov_b32_e32 v3, s45 4322; GFX1064-NEXT: v_mov_b32_e32 v4, 0 4323; GFX1064-NEXT: v_mov_b32_e32 v5, 8 4324; GFX1064-NEXT: v_mov_b32_e32 v6, 0 4325; GFX1064-NEXT: v_mov_b32_e32 v7, 0 4326; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 4327; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 4328; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] 4329; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] 4330; GFX1064-NEXT: s_mov_b32 s12, s43 4331; GFX1064-NEXT: s_mov_b32 s13, s42 4332; GFX1064-NEXT: s_mov_b32 s14, s33 4333; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 4334; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4335; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 4336; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 4337; GFX1064-NEXT: v_mov_b32_e32 v0, 8 4338; GFX1064-NEXT: v_mov_b32_e32 v1, 0 4339; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 4340; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 4341; GFX1064-NEXT: s_clause 0x1 4342; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 4343; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 4344; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 4345; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 4346; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 4347; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] 4348; GFX1064-NEXT: s_cbranch_execnz .LBB7_4 4349; GFX1064-NEXT: .LBB7_5: 4350; GFX1064-NEXT: s_endpgm 4351; 4352; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4353; GFX1032: ; %bb.0: 4354; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4355; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4356; GFX1032-NEXT: s_mov_b32 s50, -1 4357; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 4358; GFX1032-NEXT: s_add_u32 s48, s48, s11 4359; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 4360; GFX1032-NEXT: s_addc_u32 s49, s49, 0 4361; GFX1032-NEXT: s_mov_b32 s43, s8 4362; GFX1032-NEXT: s_add_u32 s8, s34, 44 4363; GFX1032-NEXT: s_mov_b32 s42, s9 4364; GFX1032-NEXT: s_addc_u32 s9, s35, 0 4365; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] 4366; GFX1032-NEXT: s_getpc_b64 s[0:1] 4367; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4368; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4369; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4370; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4371; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4372; GFX1032-NEXT: s_mov_b32 s33, s10 4373; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] 4374; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] 4375; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 4376; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 4377; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 4378; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 4379; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] 4380; GFX1032-NEXT: s_mov_b32 s12, s43 4381; GFX1032-NEXT: v_mov_b32_e32 v31, v40 4382; GFX1032-NEXT: s_mov_b32 s13, s42 4383; GFX1032-NEXT: s_mov_b32 s14, s33 4384; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 4385; GFX1032-NEXT: s_movk_i32 s32, 0x400 4386; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 4387; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 4388; GFX1032-NEXT: v_mov_b32_e32 v2, 0 4389; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4390; GFX1032-NEXT: s_mov_b32 s0, exec_lo 4391; GFX1032-NEXT: .LBB7_1: ; %ComputeLoop 4392; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 4393; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 4394; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4395; GFX1032-NEXT: v_readlane_b32 s3, v1, s1 4396; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 4397; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 4398; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 4399; GFX1032-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4400; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 4401; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4402; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1 4403; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 4404; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4405; GFX1032-NEXT: s_mov_b32 s46, 0 4406; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 4407; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 4408; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 4409; GFX1032-NEXT: s_cbranch_execz .LBB7_5 4410; GFX1032-NEXT: ; %bb.3: 4411; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 4412; GFX1032-NEXT: v_mov_b32_e32 v0, 0 4413; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4414; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 4415; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 4416; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start 4417; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 4418; GFX1032-NEXT: s_waitcnt vmcnt(0) 4419; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 4420; GFX1032-NEXT: s_add_u32 s8, s34, 44 4421; GFX1032-NEXT: s_addc_u32 s9, s35, 0 4422; GFX1032-NEXT: s_getpc_b64 s[0:1] 4423; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4424; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4425; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 4426; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 4427; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4428; GFX1032-NEXT: v_mov_b32_e32 v31, v40 4429; GFX1032-NEXT: v_mov_b32_e32 v2, s44 4430; GFX1032-NEXT: v_mov_b32_e32 v3, s45 4431; GFX1032-NEXT: v_mov_b32_e32 v4, 0 4432; GFX1032-NEXT: v_mov_b32_e32 v5, 8 4433; GFX1032-NEXT: v_mov_b32_e32 v6, 0 4434; GFX1032-NEXT: v_mov_b32_e32 v7, 0 4435; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 4436; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 4437; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] 4438; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] 4439; GFX1032-NEXT: s_mov_b32 s12, s43 4440; GFX1032-NEXT: s_mov_b32 s13, s42 4441; GFX1032-NEXT: s_mov_b32 s14, s33 4442; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 4443; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4444; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 4445; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 4446; GFX1032-NEXT: v_mov_b32_e32 v0, 8 4447; GFX1032-NEXT: v_mov_b32_e32 v1, 0 4448; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 4449; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 4450; GFX1032-NEXT: s_clause 0x1 4451; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 4452; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 4453; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 4454; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 4455; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 4456; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 4457; GFX1032-NEXT: s_cbranch_execnz .LBB7_4 4458; GFX1032-NEXT: .LBB7_5: 4459; GFX1032-NEXT: s_endpgm 4460; 4461; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4462; GFX1164: ; %bb.0: 4463; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 4464; GFX1164-NEXT: s_mov_b32 s43, s8 4465; GFX1164-NEXT: s_add_u32 s8, s34, 44 4466; GFX1164-NEXT: s_mov_b32 s42, s9 4467; GFX1164-NEXT: s_addc_u32 s9, s35, 0 4468; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] 4469; GFX1164-NEXT: s_getpc_b64 s[0:1] 4470; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4471; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4472; GFX1164-NEXT: v_mov_b32_e32 v31, v0 4473; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4474; GFX1164-NEXT: s_mov_b32 s33, s10 4475; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] 4476; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 4477; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 4478; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] 4479; GFX1164-NEXT: s_mov_b32 s12, s43 4480; GFX1164-NEXT: s_mov_b32 s13, s42 4481; GFX1164-NEXT: s_mov_b32 s14, s33 4482; GFX1164-NEXT: s_mov_b32 s32, 32 4483; GFX1164-NEXT: v_mov_b32_e32 v40, v0 4484; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] 4485; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 4486; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 4487; GFX1164-NEXT: v_mov_b32_e32 v2, 0 4488; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4489; GFX1164-NEXT: s_mov_b64 s[0:1], exec 4490; GFX1164-NEXT: .LBB7_1: ; %ComputeLoop 4491; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 4492; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4493; GFX1164-NEXT: s_ctz_i32_b64 s4, s[0:1] 4494; GFX1164-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4495; GFX1164-NEXT: v_readlane_b32 s3, v1, s4 4496; GFX1164-NEXT: v_readlane_b32 s2, v0, s4 4497; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 4498; GFX1164-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4499; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4 4500; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 4501; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4502; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 4503; GFX1164-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4504; GFX1164-NEXT: s_cbranch_scc1 .LBB7_1 4505; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 4506; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4507; GFX1164-NEXT: s_mov_b64 s[0:1], exec 4508; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4509; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 4510; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 4511; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 4512; GFX1164-NEXT: s_cbranch_execz .LBB7_5 4513; GFX1164-NEXT: ; %bb.3: 4514; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 4515; GFX1164-NEXT: v_mov_b32_e32 v0, 0 4516; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4517; GFX1164-NEXT: s_mov_b64 s[46:47], 0 4518; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 4519; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] 4520; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 4521; GFX1164-NEXT: .p2align 6 4522; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start 4523; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 4524; GFX1164-NEXT: s_waitcnt vmcnt(0) 4525; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 4526; GFX1164-NEXT: s_add_u32 s8, s34, 44 4527; GFX1164-NEXT: s_addc_u32 s9, s35, 0 4528; GFX1164-NEXT: s_getpc_b64 s[0:1] 4529; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4530; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4531; GFX1164-NEXT: v_mov_b32_e32 v31, v40 4532; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4533; GFX1164-NEXT: v_mov_b32_e32 v2, s44 4534; GFX1164-NEXT: v_mov_b32_e32 v3, s45 4535; GFX1164-NEXT: v_mov_b32_e32 v6, 0 4536; GFX1164-NEXT: v_mov_b32_e32 v7, 0 4537; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 4538; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] 4539; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] 4540; GFX1164-NEXT: s_mov_b32 s12, s43 4541; GFX1164-NEXT: s_mov_b32 s13, s42 4542; GFX1164-NEXT: s_mov_b32 s14, s33 4543; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4544; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off 4545; GFX1164-NEXT: v_mov_b32_e32 v4, 0 4546; GFX1164-NEXT: v_mov_b32_e32 v5, 8 4547; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8 4548; GFX1164-NEXT: v_mov_b32_e32 v0, 8 4549; GFX1164-NEXT: v_mov_b32_e32 v1, 0 4550; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 4551; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 4552; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off 4553; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 4554; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 4555; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 4556; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 4557; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] 4558; GFX1164-NEXT: s_cbranch_execnz .LBB7_4 4559; GFX1164-NEXT: .LBB7_5: 4560; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 4561; GFX1164-NEXT: s_endpgm 4562; 4563; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4564; GFX1132: ; %bb.0: 4565; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 4566; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] 4567; GFX1132-NEXT: s_add_u32 s8, s34, 44 4568; GFX1132-NEXT: s_addc_u32 s9, s35, 0 4569; GFX1132-NEXT: s_getpc_b64 s[0:1] 4570; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4571; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4572; GFX1132-NEXT: v_mov_b32_e32 v31, v0 4573; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4574; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] 4575; GFX1132-NEXT: s_mov_b32 s42, s14 4576; GFX1132-NEXT: s_mov_b32 s43, s13 4577; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 4578; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 4579; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] 4580; GFX1132-NEXT: s_mov_b32 s12, s13 4581; GFX1132-NEXT: s_mov_b32 s13, s14 4582; GFX1132-NEXT: s_mov_b32 s14, s15 4583; GFX1132-NEXT: s_mov_b32 s32, 32 4584; GFX1132-NEXT: s_mov_b32 s33, s15 4585; GFX1132-NEXT: v_mov_b32_e32 v40, v0 4586; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] 4587; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 4588; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 4589; GFX1132-NEXT: v_mov_b32_e32 v2, 0 4590; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000 4591; GFX1132-NEXT: s_mov_b32 s0, exec_lo 4592; GFX1132-NEXT: .LBB7_1: ; %ComputeLoop 4593; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 4594; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4595; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 4596; GFX1132-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 4597; GFX1132-NEXT: v_readlane_b32 s3, v1, s1 4598; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 4599; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 4600; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4601; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 4602; GFX1132-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 4603; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 4604; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 4605; GFX1132-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 4606; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1 4607; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 4608; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4609; GFX1132-NEXT: s_mov_b32 s46, 0 4610; GFX1132-NEXT: s_mov_b32 s0, exec_lo 4611; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 4612; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 4613; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 4614; GFX1132-NEXT: s_cbranch_execz .LBB7_5 4615; GFX1132-NEXT: ; %bb.3: 4616; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 4617; GFX1132-NEXT: v_mov_b32_e32 v0, 0 4618; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) 4619; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 4620; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 4621; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] 4622; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 4623; GFX1132-NEXT: .p2align 6 4624; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start 4625; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 4626; GFX1132-NEXT: s_waitcnt vmcnt(0) 4627; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 4628; GFX1132-NEXT: s_add_u32 s8, s34, 44 4629; GFX1132-NEXT: s_addc_u32 s9, s35, 0 4630; GFX1132-NEXT: s_getpc_b64 s[0:1] 4631; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4632; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4633; GFX1132-NEXT: v_mov_b32_e32 v31, v40 4634; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4635; GFX1132-NEXT: v_mov_b32_e32 v3, s45 4636; GFX1132-NEXT: v_mov_b32_e32 v7, 0 4637; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 4638; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] 4639; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] 4640; GFX1132-NEXT: s_mov_b32 s12, s43 4641; GFX1132-NEXT: s_mov_b32 s13, s42 4642; GFX1132-NEXT: s_mov_b32 s14, s33 4643; GFX1132-NEXT: v_mov_b32_e32 v6, 0 4644; GFX1132-NEXT: v_mov_b32_e32 v2, s44 4645; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4646; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off 4647; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 4648; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8 4649; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0 4650; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 4651; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 4652; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off 4653; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 4654; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 4655; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 4656; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 4657; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 4658; GFX1132-NEXT: s_cbranch_execnz .LBB7_4 4659; GFX1132-NEXT: .LBB7_5: 4660; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 4661; GFX1132-NEXT: s_endpgm 4662; 4663; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4664; GFX7LESS-DPP: ; %bb.0: 4665; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 4666; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4667; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4668; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 4669; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 4670; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 4671; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 4672; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 4673; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 4674; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 4675; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 4676; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 4677; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 4678; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 4679; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 4680; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 4681; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 4682; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 4683; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 4684; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 4685; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4686; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4687; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4688; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4689; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4690; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 4691; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 4692; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 4693; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 4694; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 4695; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 4696; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 4697; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 4698; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 4699; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 4700; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 4701; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 4702; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 4703; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 4704; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 4705; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] 4706; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start 4707; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 4708; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 4709; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 4710; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 4711; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 4712; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 4713; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 4714; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 4715; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 4716; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4717; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4718; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 4719; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 4720; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4721; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 4722; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 4723; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0 4724; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0 4725; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 4726; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 4727; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 4728; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 4729; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 4730; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 4731; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 4732; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 4733; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 4734; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 4735; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 4736; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 4737; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 4738; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 4739; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 4740; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 4741; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 4742; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 4743; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 4744; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 4745; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 4746; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 4747; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1 4748; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 4749; GFX7LESS-DPP-NEXT: s_endpgm 4750; 4751; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4752; GFX9-DPP: ; %bb.0: 4753; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 4754; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 4755; GFX9-DPP-NEXT: s_mov_b32 s54, -1 4756; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 4757; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 4758; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 4759; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 4760; GFX9-DPP-NEXT: s_mov_b32 s43, s8 4761; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 4762; GFX9-DPP-NEXT: s_mov_b32 s42, s9 4763; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 4764; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 4765; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 4766; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4767; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4768; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4769; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4770; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4771; GFX9-DPP-NEXT: s_mov_b32 s33, s10 4772; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 4773; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 4774; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 4775; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 4776; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] 4777; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 4778; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 4779; GFX9-DPP-NEXT: s_mov_b32 s12, s43 4780; GFX9-DPP-NEXT: s_mov_b32 s13, s42 4781; GFX9-DPP-NEXT: s_mov_b32 s14, s33 4782; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 4783; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] 4784; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 4785; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 4786; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 4787; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 4788; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 4789; GFX9-DPP-NEXT: v_cndmask_b32_e64 v11, v9, v1, s[0:1] 4790; GFX9-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 4791; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 4792; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 4793; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0 4794; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf 4795; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf 4796; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4797; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 4798; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 4799; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 4800; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 4801; GFX9-DPP-NEXT: s_nop 0 4802; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf 4803; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf 4804; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 4805; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 4806; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 4807; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 4808; GFX9-DPP-NEXT: s_nop 0 4809; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf 4810; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf 4811; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 4812; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 4813; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 4814; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 4815; GFX9-DPP-NEXT: s_nop 0 4816; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf 4817; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf 4818; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 4819; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 4820; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 4821; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 4822; GFX9-DPP-NEXT: s_nop 0 4823; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf 4824; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf 4825; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 4826; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 4827; GFX9-DPP-NEXT: s_nop 1 4828; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf 4829; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf 4830; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 4831; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 4832; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 4833; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 4834; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 4835; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 4836; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 4837; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 4838; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 4839; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 4840; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 4841; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 4842; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 4843; GFX9-DPP-NEXT: ; %bb.1: 4844; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 4845; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 4846; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 4847; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] 4848; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start 4849; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 4850; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] 4851; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 4852; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] 4853; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 4854; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 4855; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 4856; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4857; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4858; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4859; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] 4860; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 4861; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 4862; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 4863; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 4864; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 4865; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 4866; GFX9-DPP-NEXT: s_mov_b32 s12, s43 4867; GFX9-DPP-NEXT: s_mov_b32 s13, s42 4868; GFX9-DPP-NEXT: s_mov_b32 s14, s33 4869; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 4870; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 4871; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 4872; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] 4873; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 4874; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 4875; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 4876; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 4877; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 4878; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 4879; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 4880; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 4881; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 4882; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 4883; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 4884; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 4885; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 4886; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 4887; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] 4888; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] 4889; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2 4890; GFX9-DPP-NEXT: .LBB7_3: 4891; GFX9-DPP-NEXT: s_endpgm 4892; 4893; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 4894; GFX1064-DPP: ; %bb.0: 4895; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 4896; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 4897; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 4898; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 4899; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 4900; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 4901; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 4902; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 4903; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 4904; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 4905; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 4906; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 4907; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 4908; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 4909; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 4910; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 4911; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4912; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 4913; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 4914; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 4915; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 4916; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 4917; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 4918; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 4919; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 4920; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 4921; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 4922; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 4923; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 4924; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 4925; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 4926; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 4927; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 4928; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 4929; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 4930; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 4931; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0 4932; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1] 4933; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 4934; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 4935; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 4936; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4937; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 4938; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 4939; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 4940; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 4941; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 4942; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 4943; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4944; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 4945; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 4946; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 4947; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 4948; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 4949; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4950; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 4951; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 4952; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 4953; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 4954; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 4955; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4956; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 4957; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 4958; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 4959; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 4960; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 4961; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0 4962; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32 4963; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32 4964; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0 4965; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5] 4966; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3] 4967; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 4968; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 4969; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 4970; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8 4971; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9 4972; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 4973; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 4974; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 4975; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 4976; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 4977; GFX1064-DPP-NEXT: ; %bb.1: 4978; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 4979; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 4980; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 4981; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] 4982; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start 4983; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 4984; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] 4985; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) 4986; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] 4987; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 4988; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 4989; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 4990; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 4991; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 4992; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 4993; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 4994; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 4995; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 4996; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 4997; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 4998; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 4999; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 5000; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 5001; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5002; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 5003; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5004; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 5005; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 5006; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 5007; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 5008; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 5009; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 5010; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 5011; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 5012; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 5013; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 5014; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 5015; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 5016; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 5017; GFX1064-DPP-NEXT: s_clause 0x1 5018; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 5019; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 5020; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 5021; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 5022; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 5023; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 5024; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2 5025; GFX1064-DPP-NEXT: .LBB7_3: 5026; GFX1064-DPP-NEXT: s_endpgm 5027; 5028; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 5029; GFX1032-DPP: ; %bb.0: 5030; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 5031; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 5032; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 5033; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 5034; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 5035; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 5036; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 5037; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 5038; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 5039; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 5040; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 5041; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 5042; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 5043; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 5044; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 5045; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 5046; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 5047; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 5048; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 5049; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 5050; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 5051; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 5052; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 5053; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 5054; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5055; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5056; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 5057; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 5058; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 5059; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 5060; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 5061; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 5062; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 5063; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 5064; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 5065; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 5066; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0 5067; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0 5068; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 5069; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 5070; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 5071; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5072; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 5073; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 5074; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5075; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 5076; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 5077; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 5078; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5079; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5080; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5081; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 5082; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 5083; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 5084; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5085; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5086; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5087; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 5088; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 5089; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 5090; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5091; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5092; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 5093; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 5094; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5095; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5096; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 5097; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 5098; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 5099; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 5100; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 5101; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 5102; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 5103; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 5104; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 5105; GFX1032-DPP-NEXT: ; %bb.1: 5106; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 5107; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 5108; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 5109; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] 5110; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start 5111; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5112; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) 5113; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 5114; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 5115; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 5116; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 5117; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 5118; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 5119; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 5120; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 5121; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 5122; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 5123; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 5124; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 5125; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 5126; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 5127; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 5128; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 5129; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 5130; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5131; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 5132; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5133; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 5134; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 5135; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 5136; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 5137; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 5138; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 5139; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 5140; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 5141; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 5142; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 5143; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 5144; GFX1032-DPP-NEXT: s_clause 0x1 5145; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 5146; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 5147; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 5148; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 5149; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 5150; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 5151; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2 5152; GFX1032-DPP-NEXT: .LBB7_3: 5153; GFX1032-DPP-NEXT: s_endpgm 5154; 5155; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 5156; GFX1164-DPP: ; %bb.0: 5157; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 5158; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 5159; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 5160; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 5161; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 5162; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 5163; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 5164; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 5165; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 5166; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 5167; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5168; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 5169; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 5170; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5171; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 5172; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5173; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 5174; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 5175; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 5176; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 5177; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 5178; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 5179; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 5180; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 5181; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 5182; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 5183; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0 5184; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1] 5185; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 5186; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5187; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 5188; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 5189; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5190; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5191; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 5192; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 5193; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5194; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 5195; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5196; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 5197; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 5198; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5199; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5200; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5201; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5202; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 5203; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5204; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 5205; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 5206; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5207; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5208; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5209; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 5210; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 5211; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5212; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 5213; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 5214; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5215; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5216; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5217; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5218; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 5219; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 5220; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5221; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5222; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5223; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5224; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9 5225; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8 5226; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5227; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5228; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5229; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 5230; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5231; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5232; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8 5233; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5234; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9 5235; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 5236; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 5237; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 5238; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 5239; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 5240; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3 5241; GFX1164-DPP-NEXT: ; %bb.1: 5242; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 5243; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 5244; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 5245; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 5246; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] 5247; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 5248; GFX1164-DPP-NEXT: .p2align 6 5249; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start 5250; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5251; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 5252; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 5253; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 5254; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 5255; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 5256; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 5257; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 5258; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 5259; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5260; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 5261; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 5262; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 5263; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 5264; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5265; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 5266; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5267; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 5268; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 5269; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 5270; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 5271; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off 5272; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 5273; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 5274; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 5275; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 5276; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 5277; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 5278; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 5279; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off 5280; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 5281; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 5282; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 5283; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 5284; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] 5285; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2 5286; GFX1164-DPP-NEXT: .LBB7_3: 5287; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 5288; GFX1164-DPP-NEXT: s_endpgm 5289; 5290; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: 5291; GFX1132-DPP: ; %bb.0: 5292; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 5293; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 5294; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 5295; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 5296; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 5297; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 5298; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 5299; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 5300; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5301; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 5302; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 5303; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 5304; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5305; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 5306; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5307; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 5308; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 5309; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 5310; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 5311; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 5312; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 5313; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 5314; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 5315; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 5316; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 5317; GFX1132-DPP-NEXT: v_dual_mov_b32 v9, 0x7ff80000 :: v_dual_mov_b32 v8, 0 5318; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0 5319; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 5320; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5321; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 5322; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 5323; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5324; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5325; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 5326; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 5327; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 5328; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5329; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 5330; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 5331; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5332; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5333; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5334; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 5335; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5336; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 5337; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 5338; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5339; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5340; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5341; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 5342; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5343; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 5344; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 5345; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5346; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5347; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5348; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5349; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 5350; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 5351; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5352; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 5353; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 5354; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 5355; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5356; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 5357; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 5358; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 5359; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 5360; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 5361; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 5362; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 5363; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3 5364; GFX1132-DPP-NEXT: ; %bb.1: 5365; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 5366; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 5367; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 5368; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] 5369; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 5370; GFX1132-DPP-NEXT: .p2align 6 5371; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start 5372; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5373; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 5374; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 5375; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 5376; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 5377; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 5378; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 5379; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 5380; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 5381; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5382; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 5383; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 5384; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 5385; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 5386; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 5387; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 5388; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 5389; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 5390; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) 5391; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 5392; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off 5393; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 5394; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 5395; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 5396; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 5397; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 5398; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off 5399; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 5400; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 5401; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 5402; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 5403; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 5404; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2 5405; GFX1132-DPP-NEXT: .LBB7_3: 5406; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 5407; GFX1132-DPP-NEXT: s_endpgm 5408 %divValue = call double @div.double.value() 5409 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 5410 ret void 5411} 5412 5413define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 { 5414; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5415; GFX7LESS: ; %bb.0: 5416; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 5417; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 5418; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5419; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 5420; GFX7LESS-NEXT: s_cbranch_execz .LBB8_3 5421; GFX7LESS-NEXT: ; %bb.1: 5422; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 5423; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 5424; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 5425; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 5426; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 5427; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 5428; GFX7LESS-NEXT: v_mov_b32_e32 v2, s6 5429; GFX7LESS-NEXT: v_mov_b32_e32 v3, s7 5430; GFX7LESS-NEXT: s_mov_b32 s2, -1 5431; GFX7LESS-NEXT: .LBB8_2: ; %atomicrmw.start 5432; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 5433; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5434; GFX7LESS-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5435; GFX7LESS-NEXT: s_waitcnt expcnt(0) 5436; GFX7LESS-NEXT: v_mov_b32_e32 v7, v3 5437; GFX7LESS-NEXT: v_mov_b32_e32 v6, v2 5438; GFX7LESS-NEXT: v_mov_b32_e32 v5, v1 5439; GFX7LESS-NEXT: v_mov_b32_e32 v4, v0 5440; GFX7LESS-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[0:3], 0 glc 5441; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 5442; GFX7LESS-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] 5443; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5444; GFX7LESS-NEXT: v_mov_b32_e32 v2, v4 5445; GFX7LESS-NEXT: v_mov_b32_e32 v3, v5 5446; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 5447; GFX7LESS-NEXT: s_cbranch_execnz .LBB8_2 5448; GFX7LESS-NEXT: .LBB8_3: 5449; GFX7LESS-NEXT: s_endpgm 5450; 5451; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5452; GFX9: ; %bb.0: 5453; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5454; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5455; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5456; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 5457; GFX9-NEXT: s_cbranch_execz .LBB8_3 5458; GFX9-NEXT: ; %bb.1: 5459; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5460; GFX9-NEXT: s_mov_b64 s[2:3], 0 5461; GFX9-NEXT: v_mov_b32_e32 v4, 0 5462; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5463; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5464; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5465; GFX9-NEXT: v_mov_b32_e32 v2, s4 5466; GFX9-NEXT: v_mov_b32_e32 v3, s5 5467; GFX9-NEXT: .LBB8_2: ; %atomicrmw.start 5468; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5469; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5470; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5471; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc 5472; GFX9-NEXT: s_waitcnt vmcnt(0) 5473; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5474; GFX9-NEXT: v_mov_b32_e32 v3, v1 5475; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 5476; GFX9-NEXT: v_mov_b32_e32 v2, v0 5477; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 5478; GFX9-NEXT: s_cbranch_execnz .LBB8_2 5479; GFX9-NEXT: .LBB8_3: 5480; GFX9-NEXT: s_endpgm 5481; 5482; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5483; GFX1064: ; %bb.0: 5484; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5485; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5486; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5487; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 5488; GFX1064-NEXT: s_cbranch_execz .LBB8_2 5489; GFX1064-NEXT: ; %bb.1: 5490; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5491; GFX1064-NEXT: v_mov_b32_e32 v0, 0 5492; GFX1064-NEXT: v_mov_b32_e32 v1, 0x40100000 5493; GFX1064-NEXT: v_mov_b32_e32 v2, 0 5494; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 5495; GFX1064-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 5496; GFX1064-NEXT: .LBB8_2: 5497; GFX1064-NEXT: s_endpgm 5498; 5499; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5500; GFX1032: ; %bb.0: 5501; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5502; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 5503; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 5504; GFX1032-NEXT: s_cbranch_execz .LBB8_2 5505; GFX1032-NEXT: ; %bb.1: 5506; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5507; GFX1032-NEXT: v_mov_b32_e32 v0, 0 5508; GFX1032-NEXT: v_mov_b32_e32 v1, 0x40100000 5509; GFX1032-NEXT: v_mov_b32_e32 v2, 0 5510; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 5511; GFX1032-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 5512; GFX1032-NEXT: .LBB8_2: 5513; GFX1032-NEXT: s_endpgm 5514; 5515; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5516; GFX1164: ; %bb.0: 5517; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5518; GFX1164-NEXT: s_mov_b64 s[0:1], exec 5519; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5520; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5521; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 5522; GFX1164-NEXT: s_cbranch_execz .LBB8_3 5523; GFX1164-NEXT: ; %bb.1: 5524; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 5525; GFX1164-NEXT: v_mov_b32_e32 v4, 0 5526; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 5527; GFX1164-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 5528; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 5529; GFX1164-NEXT: v_mov_b32_e32 v2, s2 5530; GFX1164-NEXT: v_mov_b32_e32 v3, s3 5531; GFX1164-NEXT: s_mov_b64 s[2:3], 0 5532; GFX1164-NEXT: .LBB8_2: ; %atomicrmw.start 5533; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 5534; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5535; GFX1164-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5536; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5537; GFX1164-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc 5538; GFX1164-NEXT: s_waitcnt vmcnt(0) 5539; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5540; GFX1164-NEXT: v_mov_b32_e32 v3, v1 5541; GFX1164-NEXT: v_mov_b32_e32 v2, v0 5542; GFX1164-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 5543; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5544; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[2:3] 5545; GFX1164-NEXT: s_cbranch_execnz .LBB8_2 5546; GFX1164-NEXT: .LBB8_3: 5547; GFX1164-NEXT: s_endpgm 5548; 5549; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5550; GFX1132: ; %bb.0: 5551; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5552; GFX1132-NEXT: s_mov_b32 s2, 0 5553; GFX1132-NEXT: s_mov_b32 s0, exec_lo 5554; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 5555; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 5556; GFX1132-NEXT: s_cbranch_execz .LBB8_3 5557; GFX1132-NEXT: ; %bb.1: 5558; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 5559; GFX1132-NEXT: v_mov_b32_e32 v4, 0 5560; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 5561; GFX1132-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 5562; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 5563; GFX1132-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 5564; GFX1132-NEXT: .LBB8_2: ; %atomicrmw.start 5565; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 5566; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5567; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5568; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5569; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc 5570; GFX1132-NEXT: s_waitcnt vmcnt(0) 5571; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] 5572; GFX1132-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 5573; GFX1132-NEXT: s_or_b32 s2, vcc_lo, s2 5574; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5575; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 5576; GFX1132-NEXT: s_cbranch_execnz .LBB8_2 5577; GFX1132-NEXT: .LBB8_3: 5578; GFX1132-NEXT: s_endpgm 5579; 5580; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5581; GFX7LESS-DPP: ; %bb.0: 5582; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 5583; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 5584; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5585; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 5586; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB8_3 5587; GFX7LESS-DPP-NEXT: ; %bb.1: 5588; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 5589; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 5590; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 5591; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 5592; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 5593; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 5594; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s6 5595; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s7 5596; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 5597; GFX7LESS-DPP-NEXT: .LBB8_2: ; %atomicrmw.start 5598; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5599; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5600; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5601; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 5602; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 5603; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 5604; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, v1 5605; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v0 5606; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[0:3], 0 glc 5607; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 5608; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] 5609; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5610; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v4 5611; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v5 5612; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 5613; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB8_2 5614; GFX7LESS-DPP-NEXT: .LBB8_3: 5615; GFX7LESS-DPP-NEXT: s_endpgm 5616; 5617; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5618; GFX9-DPP: ; %bb.0: 5619; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5620; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5621; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5622; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 5623; GFX9-DPP-NEXT: s_cbranch_execz .LBB8_3 5624; GFX9-DPP-NEXT: ; %bb.1: 5625; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5626; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 5627; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 5628; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 5629; GFX9-DPP-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5630; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 5631; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 5632; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s5 5633; GFX9-DPP-NEXT: .LBB8_2: ; %atomicrmw.start 5634; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5635; GFX9-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5636; GFX9-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5637; GFX9-DPP-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc 5638; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 5639; GFX9-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5640; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v1 5641; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 5642; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v0 5643; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 5644; GFX9-DPP-NEXT: s_cbranch_execnz .LBB8_2 5645; GFX9-DPP-NEXT: .LBB8_3: 5646; GFX9-DPP-NEXT: s_endpgm 5647; 5648; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5649; GFX1064-DPP: ; %bb.0: 5650; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5651; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5652; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5653; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 5654; GFX1064-DPP-NEXT: s_cbranch_execz .LBB8_2 5655; GFX1064-DPP-NEXT: ; %bb.1: 5656; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5657; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 5658; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0x40100000 5659; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, 0 5660; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 5661; GFX1064-DPP-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 5662; GFX1064-DPP-NEXT: .LBB8_2: 5663; GFX1064-DPP-NEXT: s_endpgm 5664; 5665; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5666; GFX1032-DPP: ; %bb.0: 5667; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5668; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 5669; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 5670; GFX1032-DPP-NEXT: s_cbranch_execz .LBB8_2 5671; GFX1032-DPP-NEXT: ; %bb.1: 5672; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 5673; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 5674; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0x40100000 5675; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0 5676; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 5677; GFX1032-DPP-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 5678; GFX1032-DPP-NEXT: .LBB8_2: 5679; GFX1032-DPP-NEXT: s_endpgm 5680; 5681; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5682; GFX1164-DPP: ; %bb.0: 5683; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5684; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 5685; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5686; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5687; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 5688; GFX1164-DPP-NEXT: s_cbranch_execz .LBB8_3 5689; GFX1164-DPP-NEXT: ; %bb.1: 5690; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 5691; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 5692; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 5693; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 5694; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 5695; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s2 5696; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s3 5697; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], 0 5698; GFX1164-DPP-NEXT: .LBB8_2: ; %atomicrmw.start 5699; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5700; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5701; GFX1164-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5702; GFX1164-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5703; GFX1164-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc 5704; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 5705; GFX1164-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5706; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v1 5707; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, v0 5708; GFX1164-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 5709; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5710; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[2:3] 5711; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB8_2 5712; GFX1164-DPP-NEXT: .LBB8_3: 5713; GFX1164-DPP-NEXT: s_endpgm 5714; 5715; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_one_as_scope_unsafe: 5716; GFX1132-DPP: ; %bb.0: 5717; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5718; GFX1132-DPP-NEXT: s_mov_b32 s2, 0 5719; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 5720; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 5721; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 5722; GFX1132-DPP-NEXT: s_cbranch_execz .LBB8_3 5723; GFX1132-DPP-NEXT: ; %bb.1: 5724; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 5725; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, 0 5726; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 5727; GFX1132-DPP-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 5728; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 5729; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 5730; GFX1132-DPP-NEXT: .LBB8_2: ; %atomicrmw.start 5731; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 5732; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5733; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5734; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[0:1], 4.0 5735; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] glc 5736; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 5737; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] 5738; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 5739; GFX1132-DPP-NEXT: s_or_b32 s2, vcc_lo, s2 5740; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5741; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 5742; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2 5743; GFX1132-DPP-NEXT: .LBB8_3: 5744; GFX1132-DPP-NEXT: s_endpgm 5745 %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1 5746 ret void 5747} 5748 5749define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 { 5750; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 5751; GFX7LESS: ; %bb.0: 5752; GFX7LESS-NEXT: s_mov_b32 s32, 0 5753; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5754; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5755; GFX7LESS-NEXT: s_mov_b32 s38, -1 5756; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 5757; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 5758; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 5759; GFX7LESS-NEXT: s_mov_b32 s14, s10 5760; GFX7LESS-NEXT: s_mov_b32 s13, s9 5761; GFX7LESS-NEXT: s_mov_b32 s12, s8 5762; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[6:7] 5763; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5] 5764; GFX7LESS-NEXT: s_add_u32 s8, s34, 44 5765; GFX7LESS-NEXT: s_addc_u32 s9, s35, 0 5766; GFX7LESS-NEXT: s_getpc_b64 s[4:5] 5767; GFX7LESS-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 5768; GFX7LESS-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 5769; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 5770; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 5771; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 5772; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 5773; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 5774; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] 5775; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 5776; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] 5777; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] 5778; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 5779; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 5780; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 5781; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0 5782; GFX7LESS-NEXT: v_mov_b32_e32 v5, 0x7ff80000 5783; GFX7LESS-NEXT: .LBB9_1: ; %ComputeLoop 5784; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 5785; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1] 5786; GFX7LESS-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 5787; GFX7LESS-NEXT: v_readlane_b32 s3, v1, s4 5788; GFX7LESS-NEXT: v_readlane_b32 s2, v0, s4 5789; GFX7LESS-NEXT: s_lshl_b64 s[4:5], 1, s4 5790; GFX7LESS-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 5791; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 5792; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 5793; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 5794; GFX7LESS-NEXT: v_max_f64 v[4:5], v[2:3], v[4:5] 5795; GFX7LESS-NEXT: s_cbranch_vccnz .LBB9_1 5796; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 5797; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 5798; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 5799; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5800; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 5801; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 5802; GFX7LESS-NEXT: s_cbranch_execz .LBB9_5 5803; GFX7LESS-NEXT: ; %bb.3: 5804; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x9 5805; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 5806; GFX7LESS-NEXT: s_mov_b32 s2, -1 5807; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 5808; GFX7LESS-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 5809; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 5810; GFX7LESS-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 5811; GFX7LESS-NEXT: .LBB9_4: ; %atomicrmw.start 5812; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 5813; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 5814; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5815; GFX7LESS-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 5816; GFX7LESS-NEXT: s_waitcnt expcnt(0) 5817; GFX7LESS-NEXT: v_mov_b32_e32 v9, v3 5818; GFX7LESS-NEXT: v_mov_b32_e32 v8, v2 5819; GFX7LESS-NEXT: v_mov_b32_e32 v7, v1 5820; GFX7LESS-NEXT: v_mov_b32_e32 v6, v0 5821; GFX7LESS-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[0:3], 0 glc 5822; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 5823; GFX7LESS-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] 5824; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5825; GFX7LESS-NEXT: v_mov_b32_e32 v2, v6 5826; GFX7LESS-NEXT: v_mov_b32_e32 v3, v7 5827; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 5828; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_4 5829; GFX7LESS-NEXT: .LBB9_5: 5830; GFX7LESS-NEXT: s_endpgm 5831; 5832; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 5833; GFX9: ; %bb.0: 5834; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5835; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5836; GFX9-NEXT: s_mov_b32 s38, -1 5837; GFX9-NEXT: s_mov_b32 s39, 0xe00000 5838; GFX9-NEXT: s_add_u32 s36, s36, s11 5839; GFX9-NEXT: s_addc_u32 s37, s37, 0 5840; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] 5841; GFX9-NEXT: s_mov_b32 s12, s8 5842; GFX9-NEXT: s_add_u32 s8, s34, 44 5843; GFX9-NEXT: s_mov_b32 s13, s9 5844; GFX9-NEXT: s_addc_u32 s9, s35, 0 5845; GFX9-NEXT: s_getpc_b64 s[4:5] 5846; GFX9-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 5847; GFX9-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 5848; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 5849; GFX9-NEXT: s_mov_b32 s14, s10 5850; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] 5851; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 5852; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 5853; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] 5854; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 5855; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 5856; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 5857; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 5858; GFX9-NEXT: s_mov_b32 s32, 0 5859; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5860; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 5861; GFX9-NEXT: v_mov_b32_e32 v4, 0 5862; GFX9-NEXT: s_mov_b64 s[0:1], exec 5863; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000 5864; GFX9-NEXT: .LBB9_1: ; %ComputeLoop 5865; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5866; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1] 5867; GFX9-NEXT: v_readlane_b32 s3, v1, s4 5868; GFX9-NEXT: v_readlane_b32 s2, v0, s4 5869; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 5870; GFX9-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 5871; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4 5872; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 5873; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 5874; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[4:5] 5875; GFX9-NEXT: s_cbranch_scc1 .LBB9_1 5876; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 5877; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5878; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5879; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5880; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 5881; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 5882; GFX9-NEXT: s_cbranch_execz .LBB9_5 5883; GFX9-NEXT: ; %bb.3: 5884; GFX9-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 5885; GFX9-NEXT: v_mov_b32_e32 v6, 0 5886; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 5887; GFX9-NEXT: s_mov_b64 s[2:3], 0 5888; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5889; GFX9-NEXT: global_load_dwordx2 v[2:3], v6, s[0:1] 5890; GFX9-NEXT: .LBB9_4: ; %atomicrmw.start 5891; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5892; GFX9-NEXT: s_waitcnt vmcnt(0) 5893; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 5894; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 5895; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc 5896; GFX9-NEXT: s_waitcnt vmcnt(0) 5897; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5898; GFX9-NEXT: v_mov_b32_e32 v3, v1 5899; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 5900; GFX9-NEXT: v_mov_b32_e32 v2, v0 5901; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 5902; GFX9-NEXT: s_cbranch_execnz .LBB9_4 5903; GFX9-NEXT: .LBB9_5: 5904; GFX9-NEXT: s_endpgm 5905; 5906; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 5907; GFX1064: ; %bb.0: 5908; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5909; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5910; GFX1064-NEXT: s_mov_b32 s38, -1 5911; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 5912; GFX1064-NEXT: s_add_u32 s36, s36, s11 5913; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 5914; GFX1064-NEXT: s_addc_u32 s37, s37, 0 5915; GFX1064-NEXT: s_mov_b32 s12, s8 5916; GFX1064-NEXT: s_add_u32 s8, s34, 44 5917; GFX1064-NEXT: s_mov_b32 s13, s9 5918; GFX1064-NEXT: s_addc_u32 s9, s35, 0 5919; GFX1064-NEXT: s_getpc_b64 s[4:5] 5920; GFX1064-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 5921; GFX1064-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 5922; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 5923; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 5924; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 5925; GFX1064-NEXT: s_mov_b32 s14, s10 5926; GFX1064-NEXT: s_mov_b64 s[10:11], s[6:7] 5927; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] 5928; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 5929; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 5930; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] 5931; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] 5932; GFX1064-NEXT: s_mov_b32 s32, 0 5933; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 5934; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 5935; GFX1064-NEXT: v_mov_b32_e32 v2, 0 5936; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000 5937; GFX1064-NEXT: s_mov_b64 s[0:1], exec 5938; GFX1064-NEXT: .LBB9_1: ; %ComputeLoop 5939; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 5940; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1] 5941; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 5942; GFX1064-NEXT: v_readlane_b32 s3, v1, s4 5943; GFX1064-NEXT: v_readlane_b32 s2, v0, s4 5944; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 5945; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4 5946; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 5947; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 5948; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 5949; GFX1064-NEXT: s_cbranch_scc1 .LBB9_1 5950; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 5951; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 5952; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 5953; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 5954; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 5955; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 5956; GFX1064-NEXT: s_cbranch_execz .LBB9_4 5957; GFX1064-NEXT: ; %bb.3: 5958; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 5959; GFX1064-NEXT: v_mov_b32_e32 v0, 0 5960; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 5961; GFX1064-NEXT: global_atomic_fmax_x2 v0, v[2:3], s[0:1] 5962; GFX1064-NEXT: .LBB9_4: 5963; GFX1064-NEXT: s_endpgm 5964; 5965; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 5966; GFX1032: ; %bb.0: 5967; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5968; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5969; GFX1032-NEXT: s_mov_b32 s38, -1 5970; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 5971; GFX1032-NEXT: s_add_u32 s36, s36, s11 5972; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 5973; GFX1032-NEXT: s_addc_u32 s37, s37, 0 5974; GFX1032-NEXT: s_mov_b32 s12, s8 5975; GFX1032-NEXT: s_add_u32 s8, s34, 44 5976; GFX1032-NEXT: s_mov_b32 s13, s9 5977; GFX1032-NEXT: s_addc_u32 s9, s35, 0 5978; GFX1032-NEXT: s_getpc_b64 s[4:5] 5979; GFX1032-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 5980; GFX1032-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 5981; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 5982; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 5983; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 5984; GFX1032-NEXT: s_mov_b32 s14, s10 5985; GFX1032-NEXT: s_mov_b64 s[10:11], s[6:7] 5986; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] 5987; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 5988; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 5989; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] 5990; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] 5991; GFX1032-NEXT: s_mov_b32 s32, 0 5992; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 5993; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 5994; GFX1032-NEXT: v_mov_b32_e32 v2, 0 5995; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000 5996; GFX1032-NEXT: s_mov_b32 s0, exec_lo 5997; GFX1032-NEXT: .LBB9_1: ; %ComputeLoop 5998; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 5999; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 6000; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 6001; GFX1032-NEXT: v_readlane_b32 s3, v1, s1 6002; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 6003; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 6004; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 6005; GFX1032-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 6006; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 6007; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6008; GFX1032-NEXT: s_cbranch_scc1 .LBB9_1 6009; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 6010; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6011; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 6012; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 6013; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 6014; GFX1032-NEXT: s_cbranch_execz .LBB9_4 6015; GFX1032-NEXT: ; %bb.3: 6016; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 6017; GFX1032-NEXT: v_mov_b32_e32 v0, 0 6018; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 6019; GFX1032-NEXT: global_atomic_fmax_x2 v0, v[2:3], s[0:1] 6020; GFX1032-NEXT: .LBB9_4: 6021; GFX1032-NEXT: s_endpgm 6022; 6023; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6024; GFX1164: ; %bb.0: 6025; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 6026; GFX1164-NEXT: s_mov_b32 s12, s8 6027; GFX1164-NEXT: s_add_u32 s8, s34, 44 6028; GFX1164-NEXT: s_mov_b32 s13, s9 6029; GFX1164-NEXT: s_addc_u32 s9, s35, 0 6030; GFX1164-NEXT: s_getpc_b64 s[4:5] 6031; GFX1164-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6032; GFX1164-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6033; GFX1164-NEXT: v_mov_b32_e32 v31, v0 6034; GFX1164-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 6035; GFX1164-NEXT: s_mov_b32 s14, s10 6036; GFX1164-NEXT: s_mov_b64 s[10:11], s[6:7] 6037; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] 6038; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 6039; GFX1164-NEXT: s_mov_b32 s32, 0 6040; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 6041; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] 6042; GFX1164-NEXT: v_mov_b32_e32 v4, 0 6043; GFX1164-NEXT: v_mov_b32_e32 v5, 0x7ff80000 6044; GFX1164-NEXT: s_mov_b64 s[0:1], exec 6045; GFX1164-NEXT: .LBB9_1: ; %ComputeLoop 6046; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 6047; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6048; GFX1164-NEXT: s_ctz_i32_b64 s4, s[0:1] 6049; GFX1164-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 6050; GFX1164-NEXT: v_readlane_b32 s3, v1, s4 6051; GFX1164-NEXT: v_readlane_b32 s2, v0, s4 6052; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 6053; GFX1164-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 6054; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4 6055; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 6056; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6057; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 6058; GFX1164-NEXT: v_max_f64 v[4:5], v[2:3], v[4:5] 6059; GFX1164-NEXT: s_cbranch_scc1 .LBB9_1 6060; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 6061; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6062; GFX1164-NEXT: s_mov_b64 s[0:1], exec 6063; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6064; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 6065; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 6066; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 6067; GFX1164-NEXT: s_cbranch_execz .LBB9_5 6068; GFX1164-NEXT: ; %bb.3: 6069; GFX1164-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 6070; GFX1164-NEXT: v_mov_b32_e32 v6, 0 6071; GFX1164-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6072; GFX1164-NEXT: s_mov_b64 s[2:3], 0 6073; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 6074; GFX1164-NEXT: global_load_b64 v[2:3], v6, s[0:1] 6075; GFX1164-NEXT: .LBB9_4: ; %atomicrmw.start 6076; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 6077; GFX1164-NEXT: s_waitcnt vmcnt(0) 6078; GFX1164-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 6079; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) 6080; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 6081; GFX1164-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc 6082; GFX1164-NEXT: s_waitcnt vmcnt(0) 6083; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6084; GFX1164-NEXT: v_mov_b32_e32 v3, v1 6085; GFX1164-NEXT: v_mov_b32_e32 v2, v0 6086; GFX1164-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 6087; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6088; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[2:3] 6089; GFX1164-NEXT: s_cbranch_execnz .LBB9_4 6090; GFX1164-NEXT: .LBB9_5: 6091; GFX1164-NEXT: s_endpgm 6092; 6093; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6094; GFX1132: ; %bb.0: 6095; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 6096; GFX1132-NEXT: v_mov_b32_e32 v31, v0 6097; GFX1132-NEXT: s_add_u32 s8, s34, 44 6098; GFX1132-NEXT: s_addc_u32 s9, s35, 0 6099; GFX1132-NEXT: s_getpc_b64 s[4:5] 6100; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6101; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6102; GFX1132-NEXT: s_mov_b32 s12, s13 6103; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 6104; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] 6105; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] 6106; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 6107; GFX1132-NEXT: s_mov_b32 s13, s14 6108; GFX1132-NEXT: s_mov_b32 s14, s15 6109; GFX1132-NEXT: s_mov_b32 s32, 0 6110; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 6111; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] 6112; GFX1132-NEXT: v_mov_b32_e32 v4, 0 6113; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 6114; GFX1132-NEXT: s_mov_b32 s0, exec_lo 6115; GFX1132-NEXT: .LBB9_1: ; %ComputeLoop 6116; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 6117; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6118; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 6119; GFX1132-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 6120; GFX1132-NEXT: v_readlane_b32 s3, v1, s1 6121; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 6122; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 6123; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6124; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 6125; GFX1132-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 6126; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 6127; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 6128; GFX1132-NEXT: v_max_f64 v[4:5], v[2:3], v[4:5] 6129; GFX1132-NEXT: s_cbranch_scc1 .LBB9_1 6130; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 6131; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6132; GFX1132-NEXT: s_mov_b32 s2, 0 6133; GFX1132-NEXT: s_mov_b32 s0, exec_lo 6134; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 6135; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 6136; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 6137; GFX1132-NEXT: s_cbranch_execz .LBB9_5 6138; GFX1132-NEXT: ; %bb.3: 6139; GFX1132-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 6140; GFX1132-NEXT: v_mov_b32_e32 v6, 0 6141; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) 6142; GFX1132-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6143; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 6144; GFX1132-NEXT: global_load_b64 v[2:3], v6, s[0:1] 6145; GFX1132-NEXT: .LBB9_4: ; %atomicrmw.start 6146; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 6147; GFX1132-NEXT: s_waitcnt vmcnt(0) 6148; GFX1132-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 6149; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 6150; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 6151; GFX1132-NEXT: global_atomic_cmpswap_b64 v[0:1], v6, v[0:3], s[0:1] glc 6152; GFX1132-NEXT: s_waitcnt vmcnt(0) 6153; GFX1132-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] 6154; GFX1132-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 6155; GFX1132-NEXT: s_or_b32 s2, vcc_lo, s2 6156; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6157; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 6158; GFX1132-NEXT: s_cbranch_execnz .LBB9_4 6159; GFX1132-NEXT: .LBB9_5: 6160; GFX1132-NEXT: s_endpgm 6161; 6162; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6163; GFX7LESS-DPP: ; %bb.0: 6164; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 6165; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 6166; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 6167; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 6168; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 6169; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 6170; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 6171; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 6172; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 6173; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 6174; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6175; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 6176; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 6177; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 6178; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 6179; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 6180; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] 6181; GFX7LESS-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6182; GFX7LESS-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6183; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 6184; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 6185; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 6186; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 6187; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 6188; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6189; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6190; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] 6191; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] 6192; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 6193; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6194; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0 6195; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 6196; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] 6197; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start 6198; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 6199; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 6200; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 6201; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 6202; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 6203; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v9, v3 6204; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2 6205; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1 6206; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0 6207; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc 6208; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 6209; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] 6210; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 6211; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v6 6212; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v7 6213; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[0:1] 6214; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_1 6215; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 6216; GFX7LESS-DPP-NEXT: s_endpgm 6217; 6218; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6219; GFX9-DPP: ; %bb.0: 6220; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 6221; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 6222; GFX9-DPP-NEXT: s_mov_b32 s38, -1 6223; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 6224; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 6225; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 6226; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 6227; GFX9-DPP-NEXT: s_mov_b32 s12, s8 6228; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 6229; GFX9-DPP-NEXT: s_mov_b32 s13, s9 6230; GFX9-DPP-NEXT: s_addc_u32 s9, s35, 0 6231; GFX9-DPP-NEXT: s_getpc_b64 s[4:5] 6232; GFX9-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6233; GFX9-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6234; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 6235; GFX9-DPP-NEXT: s_mov_b32 s14, s10 6236; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6237; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 6238; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 6239; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6240; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6241; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 6242; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 6243; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 6244; GFX9-DPP-NEXT: s_mov_b32 s32, 0 6245; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 6246; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6247; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 6248; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7ff80000 6249; GFX9-DPP-NEXT: v_cndmask_b32_e64 v6, v4, v1, s[0:1] 6250; GFX9-DPP-NEXT: v_cndmask_b32_e64 v5, 0, v0, s[0:1] 6251; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0x7ff80000 6252; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 6253; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 6254; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v6 row_shr:1 row_mask:0xf bank_mask:0xf 6255; GFX9-DPP-NEXT: v_mov_b32_dpp v7, v5 row_shr:1 row_mask:0xf bank_mask:0xf 6256; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6257; GFX9-DPP-NEXT: v_max_f64 v[7:8], v[7:8], v[7:8] 6258; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[7:8] 6259; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0x7ff80000 6260; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 6261; GFX9-DPP-NEXT: s_nop 0 6262; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v6 row_shr:2 row_mask:0xf bank_mask:0xf 6263; GFX9-DPP-NEXT: v_mov_b32_dpp v7, v5 row_shr:2 row_mask:0xf bank_mask:0xf 6264; GFX9-DPP-NEXT: v_max_f64 v[7:8], v[7:8], v[7:8] 6265; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[7:8] 6266; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0x7ff80000 6267; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 6268; GFX9-DPP-NEXT: s_nop 0 6269; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v6 row_shr:4 row_mask:0xf bank_mask:0xf 6270; GFX9-DPP-NEXT: v_mov_b32_dpp v7, v5 row_shr:4 row_mask:0xf bank_mask:0xf 6271; GFX9-DPP-NEXT: v_max_f64 v[7:8], v[7:8], v[7:8] 6272; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[7:8] 6273; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0x7ff80000 6274; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 6275; GFX9-DPP-NEXT: s_nop 0 6276; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v6 row_shr:8 row_mask:0xf bank_mask:0xf 6277; GFX9-DPP-NEXT: v_mov_b32_dpp v7, v5 row_shr:8 row_mask:0xf bank_mask:0xf 6278; GFX9-DPP-NEXT: v_max_f64 v[7:8], v[7:8], v[7:8] 6279; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[7:8] 6280; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0x7ff80000 6281; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 6282; GFX9-DPP-NEXT: s_nop 0 6283; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v6 row_bcast:15 row_mask:0xa bank_mask:0xf 6284; GFX9-DPP-NEXT: v_mov_b32_dpp v7, v5 row_bcast:15 row_mask:0xa bank_mask:0xf 6285; GFX9-DPP-NEXT: v_max_f64 v[7:8], v[7:8], v[7:8] 6286; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[7:8] 6287; GFX9-DPP-NEXT: s_nop 1 6288; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v6 row_bcast:31 row_mask:0xc bank_mask:0xf 6289; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v5 row_bcast:31 row_mask:0xc bank_mask:0xf 6290; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[3:4] 6291; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 6292; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 6293; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 6294; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 6295; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 6296; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 6297; GFX9-DPP-NEXT: v_readlane_b32 s3, v4, 63 6298; GFX9-DPP-NEXT: v_readlane_b32 s2, v3, 63 6299; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 6300; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[2:3] 6301; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 6302; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc 6303; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3 6304; GFX9-DPP-NEXT: ; %bb.1: 6305; GFX9-DPP-NEXT: s_load_dwordx2 s[2:3], s[34:35], 0x24 6306; GFX9-DPP-NEXT: s_mov_b64 s[4:5], 0 6307; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 6308; GFX9-DPP-NEXT: global_load_dwordx2 v[11:12], v0, s[2:3] 6309; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start 6310; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 6311; GFX9-DPP-NEXT: v_max_f64 v[1:2], s[0:1], s[0:1] 6312; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 6313; GFX9-DPP-NEXT: v_max_f64 v[9:10], v[11:12], v[11:12] 6314; GFX9-DPP-NEXT: v_max_f64 v[9:10], v[9:10], v[1:2] 6315; GFX9-DPP-NEXT: global_atomic_cmpswap_x2 v[1:2], v0, v[9:12], s[2:3] glc 6316; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 6317; GFX9-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[1:2], v[11:12] 6318; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v2 6319; GFX9-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6320; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1 6321; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 6322; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2 6323; GFX9-DPP-NEXT: .LBB9_3: 6324; GFX9-DPP-NEXT: s_endpgm 6325; 6326; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6327; GFX1064-DPP: ; %bb.0: 6328; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 6329; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 6330; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 6331; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 6332; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 6333; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 6334; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 6335; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 6336; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 6337; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 6338; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 6339; GFX1064-DPP-NEXT: s_getpc_b64 s[4:5] 6340; GFX1064-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6341; GFX1064-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6342; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 6343; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 6344; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 6345; GFX1064-DPP-NEXT: s_mov_b32 s14, s10 6346; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6347; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6348; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6349; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 6350; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 6351; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 6352; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 6353; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 6354; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6355; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 6356; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0x7ff80000 6357; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 6358; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v6, 0x7ff80000, v1, s[0:1] 6359; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v5, 0, v0, s[0:1] 6360; GFX1064-DPP-NEXT: v_mov_b32_dpp v4, v6 row_xmask:1 row_mask:0xf bank_mask:0xf 6361; GFX1064-DPP-NEXT: v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf 6362; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6363; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[3:4] 6364; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 6365; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6366; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0 6367; GFX1064-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:2 row_mask:0xf bank_mask:0xf 6368; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 6369; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6370; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6371; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6372; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0 6373; GFX1064-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:4 row_mask:0xf bank_mask:0xf 6374; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 6375; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6376; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6377; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6378; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 0 6379; GFX1064-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:8 row_mask:0xf bank_mask:0xf 6380; GFX1064-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 6381; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6382; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6383; GFX1064-DPP-NEXT: v_permlanex16_b32 v6, v4, 0, 0 6384; GFX1064-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 6385; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6386; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6387; GFX1064-DPP-NEXT: v_readlane_b32 s3, v4, 0 6388; GFX1064-DPP-NEXT: v_readlane_b32 s5, v4, 32 6389; GFX1064-DPP-NEXT: v_readlane_b32 s4, v3, 32 6390; GFX1064-DPP-NEXT: v_readlane_b32 s2, v3, 0 6391; GFX1064-DPP-NEXT: v_max_f64 v[3:4], s[4:5], s[4:5] 6392; GFX1064-DPP-NEXT: v_max_f64 v[5:6], s[2:3], s[2:3] 6393; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 6394; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 6395; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6396; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, 0 6397; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v7, exec_hi, v0 6398; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, v3 6399; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, v4 6400; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6401; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 6402; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_2 6403; GFX1064-DPP-NEXT: ; %bb.1: 6404; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 6405; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 6406; GFX1064-DPP-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 6407; GFX1064-DPP-NEXT: .LBB9_2: 6408; GFX1064-DPP-NEXT: s_endpgm 6409; 6410; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6411; GFX1032-DPP: ; %bb.0: 6412; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 6413; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 6414; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 6415; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 6416; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 6417; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 6418; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 6419; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 6420; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 6421; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 6422; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 6423; GFX1032-DPP-NEXT: s_getpc_b64 s[4:5] 6424; GFX1032-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6425; GFX1032-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6426; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 6427; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 6428; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 6429; GFX1032-DPP-NEXT: s_mov_b32 s14, s10 6430; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6431; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6432; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6433; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 6434; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] 6435; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] 6436; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 6437; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 6438; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6439; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 6440; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0x7ff80000 6441; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 6442; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v6, 0x7ff80000, v1, s0 6443; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v5, 0, v0, s0 6444; GFX1032-DPP-NEXT: v_mov_b32_dpp v4, v6 row_xmask:1 row_mask:0xf bank_mask:0xf 6445; GFX1032-DPP-NEXT: v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf 6446; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6447; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[3:4] 6448; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 6449; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6450; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0 6451; GFX1032-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:2 row_mask:0xf bank_mask:0xf 6452; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 6453; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6454; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6455; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6456; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0 6457; GFX1032-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:4 row_mask:0xf bank_mask:0xf 6458; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 6459; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6460; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6461; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0x7ff80000 6462; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 0 6463; GFX1032-DPP-NEXT: v_mov_b32_dpp v6, v4 row_xmask:8 row_mask:0xf bank_mask:0xf 6464; GFX1032-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 6465; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6466; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6467; GFX1032-DPP-NEXT: v_permlanex16_b32 v6, v4, 0, 0 6468; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 6469; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] 6470; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] 6471; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 6472; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 6473; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0 6474; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0 6475; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 6476; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 6477; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 6478; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_2 6479; GFX1032-DPP-NEXT: ; %bb.1: 6480; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[34:35], 0x24 6481; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 6482; GFX1032-DPP-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] 6483; GFX1032-DPP-NEXT: .LBB9_2: 6484; GFX1032-DPP-NEXT: s_endpgm 6485; 6486; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6487; GFX1164-DPP: ; %bb.0: 6488; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 6489; GFX1164-DPP-NEXT: s_mov_b32 s12, s8 6490; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 6491; GFX1164-DPP-NEXT: s_mov_b32 s13, s9 6492; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 6493; GFX1164-DPP-NEXT: s_getpc_b64 s[4:5] 6494; GFX1164-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6495; GFX1164-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6496; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 6497; GFX1164-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 6498; GFX1164-DPP-NEXT: s_mov_b32 s14, s10 6499; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6500; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6501; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6502; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 6503; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 6504; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6505; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 6506; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0x7ff80000 6507; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, 0 6508; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s[0:1] 6509; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v4, 0, v0, s[0:1] 6510; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6511; GFX1164-DPP-NEXT: v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf 6512; GFX1164-DPP-NEXT: v_mov_b32_dpp v2, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 6513; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6514; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 6515; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 6516; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] 6517; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 0x7ff80000 6518; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 6519; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6520; GFX1164-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 6521; GFX1164-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:2 row_mask:0xf bank_mask:0xf 6522; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6523; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6524; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6525; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 0x7ff80000 6526; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 6527; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6528; GFX1164-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 6529; GFX1164-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:4 row_mask:0xf bank_mask:0xf 6530; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6531; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6532; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6533; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 0x7ff80000 6534; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 6535; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6536; GFX1164-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 6537; GFX1164-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:8 row_mask:0xf bank_mask:0xf 6538; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6539; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6540; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6541; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6542; GFX1164-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 6543; GFX1164-DPP-NEXT: v_permlanex16_b32 v4, v2, 0, 0 6544; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6545; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6546; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6547; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6548; GFX1164-DPP-NEXT: v_permlane64_b32 v5, v3 6549; GFX1164-DPP-NEXT: v_permlane64_b32 v4, v2 6550; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6551; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6552; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6553; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 6554; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 6555; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6556; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 6557; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 6558; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 6559; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6560; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 6561; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, v3 6562; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v6 6563; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3 6564; GFX1164-DPP-NEXT: ; %bb.1: 6565; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 6566; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 6567; GFX1164-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 6568; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], 0 6569; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 6570; GFX1164-DPP-NEXT: global_load_b64 v[8:9], v10, s[0:1] 6571; GFX1164-DPP-NEXT: .LBB9_2: ; %atomicrmw.start 6572; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 6573; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 6574; GFX1164-DPP-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] 6575; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 6576; GFX1164-DPP-NEXT: v_max_f64 v[6:7], v[6:7], v[0:1] 6577; GFX1164-DPP-NEXT: global_atomic_cmpswap_b64 v[6:7], v10, v[6:9], s[0:1] glc 6578; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 6579; GFX1164-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[8:9] 6580; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, v7 6581; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, v6 6582; GFX1164-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 6583; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6584; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[2:3] 6585; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2 6586; GFX1164-DPP-NEXT: .LBB9_3: 6587; GFX1164-DPP-NEXT: s_endpgm 6588; 6589; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: 6590; GFX1132-DPP: ; %bb.0: 6591; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 6592; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 6593; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 6594; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 6595; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] 6596; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 6597; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 6598; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 6599; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 6600; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] 6601; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] 6602; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 6603; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 6604; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 6605; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 6606; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 6607; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 6608; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 6609; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 6610; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 6611; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v4, 0, v0, s0 6612; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6613; GFX1132-DPP-NEXT: v_mov_b32_dpp v3, v5 row_xmask:1 row_mask:0xf bank_mask:0xf 6614; GFX1132-DPP-NEXT: v_mov_b32_dpp v2, v4 row_xmask:1 row_mask:0xf bank_mask:0xf 6615; GFX1132-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6616; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 6617; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 6618; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] 6619; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0 6620; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6621; GFX1132-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:2 row_mask:0xf bank_mask:0xf 6622; GFX1132-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:2 row_mask:0xf bank_mask:0xf 6623; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6624; GFX1132-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6625; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6626; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0 6627; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6628; GFX1132-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:4 row_mask:0xf bank_mask:0xf 6629; GFX1132-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:4 row_mask:0xf bank_mask:0xf 6630; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6631; GFX1132-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6632; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6633; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 0x7ff80000 :: v_dual_mov_b32 v4, 0 6634; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6635; GFX1132-DPP-NEXT: v_mov_b32_dpp v5, v3 row_xmask:8 row_mask:0xf bank_mask:0xf 6636; GFX1132-DPP-NEXT: v_mov_b32_dpp v4, v2 row_xmask:8 row_mask:0xf bank_mask:0xf 6637; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6638; GFX1132-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6639; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6640; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 6641; GFX1132-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 6642; GFX1132-DPP-NEXT: v_permlanex16_b32 v4, v2, 0, 0 6643; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6644; GFX1132-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 6645; GFX1132-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 6646; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 6647; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6648; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, v2 6649; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v6, exec_lo, 0 6650; GFX1132-DPP-NEXT: v_dual_mov_b32 v10, 0 :: v_dual_mov_b32 v1, v3 6651; GFX1132-DPP-NEXT: s_mov_b32 s2, 0 6652; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 6653; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 6654; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v6 6655; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3 6656; GFX1132-DPP-NEXT: ; %bb.1: 6657; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[34:35], 0x24 6658; GFX1132-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 6659; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 6660; GFX1132-DPP-NEXT: global_load_b64 v[8:9], v10, s[0:1] 6661; GFX1132-DPP-NEXT: .LBB9_2: ; %atomicrmw.start 6662; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 6663; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 6664; GFX1132-DPP-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] 6665; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 6666; GFX1132-DPP-NEXT: v_max_f64 v[6:7], v[6:7], v[0:1] 6667; GFX1132-DPP-NEXT: global_atomic_cmpswap_b64 v[6:7], v10, v[6:9], s[0:1] glc 6668; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 6669; GFX1132-DPP-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[8:9] 6670; GFX1132-DPP-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 6671; GFX1132-DPP-NEXT: s_or_b32 s2, vcc_lo, s2 6672; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6673; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 6674; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2 6675; GFX1132-DPP-NEXT: .LBB9_3: 6676; GFX1132-DPP-NEXT: s_endpgm 6677 %divValue = call double @div.double.value() 6678 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue syncscope("one-as") monotonic, !amdgpu.no.fine.grained.memory !1 6679 ret void 6680} 6681 6682define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 { 6683; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 6684; GFX7LESS: ; %bb.0: 6685; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 6686; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 6687; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 6688; GFX7LESS-NEXT: s_mov_b32 s50, -1 6689; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 6690; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 6691; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 6692; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] 6693; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 6694; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 6695; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 6696; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 6697; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3 6698; GFX7LESS-NEXT: ; %bb.1: 6699; GFX7LESS-NEXT: s_mov_b32 s33, s10 6700; GFX7LESS-NEXT: s_mov_b32 s42, s9 6701; GFX7LESS-NEXT: s_mov_b32 s43, s8 6702; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] 6703; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] 6704; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] 6705; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 6706; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 6707; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 6708; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 6709; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 6710; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 6711; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 6712; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 6713; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0 6714; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1 6715; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2 6716; GFX7LESS-NEXT: .LBB10_2: ; %atomicrmw.start 6717; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 6718; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 6719; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 6720; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 6721; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 6722; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 6723; GFX7LESS-NEXT: s_waitcnt expcnt(0) 6724; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 6725; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 6726; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 6727; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 6728; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 6729; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 6730; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 6731; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 6732; GFX7LESS-NEXT: s_waitcnt expcnt(0) 6733; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 6734; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 6735; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0 6736; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 6737; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 6738; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 6739; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 6740; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] 6741; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 6742; GFX7LESS-NEXT: s_mov_b32 s12, s43 6743; GFX7LESS-NEXT: s_mov_b32 s13, s42 6744; GFX7LESS-NEXT: s_mov_b32 s14, s33 6745; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 6746; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 6747; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 6748; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 6749; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 6750; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 6751; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 6752; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 6753; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 6754; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 6755; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 6756; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 6757; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] 6758; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2 6759; GFX7LESS-NEXT: .LBB10_3: 6760; GFX7LESS-NEXT: s_endpgm 6761; 6762; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 6763; GFX9: ; %bb.0: 6764; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 6765; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 6766; GFX9-NEXT: s_mov_b32 s50, -1 6767; GFX9-NEXT: s_mov_b32 s51, 0xe00000 6768; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 6769; GFX9-NEXT: s_add_u32 s48, s48, s11 6770; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 6771; GFX9-NEXT: s_addc_u32 s49, s49, 0 6772; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] 6773; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 6774; GFX9-NEXT: s_movk_i32 s32, 0x800 6775; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 6776; GFX9-NEXT: s_cbranch_execz .LBB10_3 6777; GFX9-NEXT: ; %bb.1: 6778; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 6779; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 6780; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 6781; GFX9-NEXT: s_mov_b32 s33, s10 6782; GFX9-NEXT: s_mov_b32 s42, s9 6783; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6784; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 6785; GFX9-NEXT: s_mov_b32 s43, s8 6786; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] 6787; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] 6788; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] 6789; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6790; GFX9-NEXT: v_mov_b32_e32 v2, s1 6791; GFX9-NEXT: s_mov_b64 s[46:47], 0 6792; GFX9-NEXT: v_mov_b32_e32 v1, s0 6793; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 6794; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start 6795; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6796; GFX9-NEXT: s_waitcnt vmcnt(0) 6797; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 6798; GFX9-NEXT: s_add_u32 s8, s36, 44 6799; GFX9-NEXT: s_addc_u32 s9, s37, 0 6800; GFX9-NEXT: s_getpc_b64 s[0:1] 6801; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 6802; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 6803; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 6804; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 6805; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 6806; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 6807; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 6808; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 6809; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] 6810; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 6811; GFX9-NEXT: s_mov_b32 s12, s43 6812; GFX9-NEXT: s_mov_b32 s13, s42 6813; GFX9-NEXT: s_mov_b32 s14, s33 6814; GFX9-NEXT: v_mov_b32_e32 v31, v40 6815; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 6816; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 6817; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 6818; GFX9-NEXT: v_mov_b32_e32 v0, 8 6819; GFX9-NEXT: v_mov_b32_e32 v1, 0 6820; GFX9-NEXT: v_mov_b32_e32 v2, s44 6821; GFX9-NEXT: v_mov_b32_e32 v3, s45 6822; GFX9-NEXT: v_mov_b32_e32 v4, 0 6823; GFX9-NEXT: v_mov_b32_e32 v5, 8 6824; GFX9-NEXT: v_mov_b32_e32 v6, 0 6825; GFX9-NEXT: v_mov_b32_e32 v7, 0 6826; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6827; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 6828; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 6829; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 6830; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 6831; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 6832; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 6833; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] 6834; GFX9-NEXT: s_cbranch_execnz .LBB10_2 6835; GFX9-NEXT: .LBB10_3: 6836; GFX9-NEXT: s_endpgm 6837; 6838; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 6839; GFX1064: ; %bb.0: 6840; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 6841; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 6842; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 6843; GFX1064-NEXT: s_mov_b32 s50, -1 6844; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 6845; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 6846; GFX1064-NEXT: s_add_u32 s48, s48, s11 6847; GFX1064-NEXT: s_addc_u32 s49, s49, 0 6848; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] 6849; GFX1064-NEXT: s_movk_i32 s32, 0x800 6850; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 6851; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 6852; GFX1064-NEXT: s_cbranch_execz .LBB10_3 6853; GFX1064-NEXT: ; %bb.1: 6854; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 6855; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 6856; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 6857; GFX1064-NEXT: s_mov_b32 s33, s10 6858; GFX1064-NEXT: s_mov_b32 s42, s9 6859; GFX1064-NEXT: s_mov_b32 s43, s8 6860; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] 6861; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 6862; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] 6863; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] 6864; GFX1064-NEXT: s_mov_b64 s[46:47], 0 6865; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 6866; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 6867; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 6868; GFX1064-NEXT: v_mov_b32_e32 v2, s1 6869; GFX1064-NEXT: v_mov_b32_e32 v1, s0 6870; GFX1064-NEXT: .LBB10_2: ; %atomicrmw.start 6871; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 6872; GFX1064-NEXT: s_waitcnt vmcnt(0) 6873; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 6874; GFX1064-NEXT: s_add_u32 s8, s36, 44 6875; GFX1064-NEXT: s_addc_u32 s9, s37, 0 6876; GFX1064-NEXT: s_getpc_b64 s[0:1] 6877; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 6878; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 6879; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 6880; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 6881; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 6882; GFX1064-NEXT: v_mov_b32_e32 v31, v40 6883; GFX1064-NEXT: v_mov_b32_e32 v0, 8 6884; GFX1064-NEXT: v_mov_b32_e32 v1, 0 6885; GFX1064-NEXT: v_mov_b32_e32 v2, s44 6886; GFX1064-NEXT: v_mov_b32_e32 v5, 8 6887; GFX1064-NEXT: v_mov_b32_e32 v6, 0 6888; GFX1064-NEXT: v_mov_b32_e32 v7, 0 6889; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 6890; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 6891; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] 6892; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] 6893; GFX1064-NEXT: s_mov_b32 s12, s43 6894; GFX1064-NEXT: s_mov_b32 s13, s42 6895; GFX1064-NEXT: s_mov_b32 s14, s33 6896; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 6897; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 6898; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 6899; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 6900; GFX1064-NEXT: v_mov_b32_e32 v3, s45 6901; GFX1064-NEXT: v_mov_b32_e32 v4, 0 6902; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 6903; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 6904; GFX1064-NEXT: s_clause 0x1 6905; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 6906; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 6907; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 6908; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 6909; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 6910; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] 6911; GFX1064-NEXT: s_cbranch_execnz .LBB10_2 6912; GFX1064-NEXT: .LBB10_3: 6913; GFX1064-NEXT: s_endpgm 6914; 6915; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 6916; GFX1032: ; %bb.0: 6917; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 6918; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 6919; GFX1032-NEXT: s_mov_b32 s50, -1 6920; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 6921; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 6922; GFX1032-NEXT: s_add_u32 s48, s48, s11 6923; GFX1032-NEXT: s_addc_u32 s49, s49, 0 6924; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] 6925; GFX1032-NEXT: s_mov_b32 s46, 0 6926; GFX1032-NEXT: s_movk_i32 s32, 0x400 6927; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 6928; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 6929; GFX1032-NEXT: s_cbranch_execz .LBB10_3 6930; GFX1032-NEXT: ; %bb.1: 6931; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 6932; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 6933; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 6934; GFX1032-NEXT: s_mov_b32 s33, s10 6935; GFX1032-NEXT: s_mov_b32 s42, s9 6936; GFX1032-NEXT: s_mov_b32 s43, s8 6937; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] 6938; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 6939; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] 6940; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] 6941; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 6942; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 6943; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 6944; GFX1032-NEXT: v_mov_b32_e32 v2, s1 6945; GFX1032-NEXT: v_mov_b32_e32 v1, s0 6946; GFX1032-NEXT: .LBB10_2: ; %atomicrmw.start 6947; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 6948; GFX1032-NEXT: s_waitcnt vmcnt(0) 6949; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 6950; GFX1032-NEXT: s_add_u32 s8, s36, 44 6951; GFX1032-NEXT: s_addc_u32 s9, s37, 0 6952; GFX1032-NEXT: s_getpc_b64 s[0:1] 6953; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 6954; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 6955; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 6956; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 6957; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 6958; GFX1032-NEXT: v_mov_b32_e32 v31, v40 6959; GFX1032-NEXT: v_mov_b32_e32 v0, 8 6960; GFX1032-NEXT: v_mov_b32_e32 v1, 0 6961; GFX1032-NEXT: v_mov_b32_e32 v2, s44 6962; GFX1032-NEXT: v_mov_b32_e32 v5, 8 6963; GFX1032-NEXT: v_mov_b32_e32 v6, 0 6964; GFX1032-NEXT: v_mov_b32_e32 v7, 0 6965; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 6966; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 6967; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] 6968; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] 6969; GFX1032-NEXT: s_mov_b32 s12, s43 6970; GFX1032-NEXT: s_mov_b32 s13, s42 6971; GFX1032-NEXT: s_mov_b32 s14, s33 6972; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 6973; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 6974; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 6975; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 6976; GFX1032-NEXT: v_mov_b32_e32 v3, s45 6977; GFX1032-NEXT: v_mov_b32_e32 v4, 0 6978; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 6979; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 6980; GFX1032-NEXT: s_clause 0x1 6981; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 6982; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 6983; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 6984; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 6985; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 6986; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 6987; GFX1032-NEXT: s_cbranch_execnz .LBB10_2 6988; GFX1032-NEXT: .LBB10_3: 6989; GFX1032-NEXT: s_endpgm 6990; 6991; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 6992; GFX1164: ; %bb.0: 6993; GFX1164-NEXT: v_mov_b32_e32 v40, v0 6994; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 6995; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] 6996; GFX1164-NEXT: s_mov_b32 s32, 32 6997; GFX1164-NEXT: s_mov_b64 s[0:1], exec 6998; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6999; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 7000; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 7001; GFX1164-NEXT: s_cbranch_execz .LBB10_3 7002; GFX1164-NEXT: ; %bb.1: 7003; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 7004; GFX1164-NEXT: s_mov_b32 s33, s10 7005; GFX1164-NEXT: s_mov_b32 s42, s9 7006; GFX1164-NEXT: s_mov_b32 s43, s8 7007; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] 7008; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] 7009; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] 7010; GFX1164-NEXT: s_mov_b64 s[46:47], 0 7011; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 7012; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 7013; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 7014; GFX1164-NEXT: v_mov_b32_e32 v2, s1 7015; GFX1164-NEXT: v_mov_b32_e32 v1, s0 7016; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 7017; GFX1164-NEXT: .p2align 6 7018; GFX1164-NEXT: .LBB10_2: ; %atomicrmw.start 7019; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 7020; GFX1164-NEXT: s_waitcnt vmcnt(0) 7021; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) 7022; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7023; GFX1164-NEXT: s_add_u32 s8, s36, 44 7024; GFX1164-NEXT: s_addc_u32 s9, s37, 0 7025; GFX1164-NEXT: s_getpc_b64 s[0:1] 7026; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7027; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7028; GFX1164-NEXT: v_mov_b32_e32 v31, v40 7029; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 7030; GFX1164-NEXT: v_mov_b32_e32 v0, 8 7031; GFX1164-NEXT: v_mov_b32_e32 v5, 8 7032; GFX1164-NEXT: v_mov_b32_e32 v6, 0 7033; GFX1164-NEXT: v_mov_b32_e32 v7, 0 7034; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 7035; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] 7036; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] 7037; GFX1164-NEXT: s_mov_b32 s12, s43 7038; GFX1164-NEXT: s_mov_b32 s13, s42 7039; GFX1164-NEXT: s_mov_b32 s14, s33 7040; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7041; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off 7042; GFX1164-NEXT: v_mov_b32_e32 v1, 0 7043; GFX1164-NEXT: v_mov_b32_e32 v2, s44 7044; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 7045; GFX1164-NEXT: v_mov_b32_e32 v3, s45 7046; GFX1164-NEXT: v_mov_b32_e32 v4, 0 7047; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 7048; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 7049; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off 7050; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 7051; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 7052; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7053; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7054; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] 7055; GFX1164-NEXT: s_cbranch_execnz .LBB10_2 7056; GFX1164-NEXT: .LBB10_3: 7057; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 7058; GFX1164-NEXT: s_endpgm 7059; 7060; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7061; GFX1132: ; %bb.0: 7062; GFX1132-NEXT: v_mov_b32_e32 v40, v0 7063; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7064; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] 7065; GFX1132-NEXT: s_mov_b32 s46, 0 7066; GFX1132-NEXT: s_mov_b32 s32, 32 7067; GFX1132-NEXT: s_mov_b32 s0, exec_lo 7068; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 7069; GFX1132-NEXT: s_cbranch_execz .LBB10_3 7070; GFX1132-NEXT: ; %bb.1: 7071; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 7072; GFX1132-NEXT: s_mov_b32 s33, s15 7073; GFX1132-NEXT: s_mov_b32 s42, s14 7074; GFX1132-NEXT: s_mov_b32 s43, s13 7075; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] 7076; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] 7077; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] 7078; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 7079; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 7080; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 7081; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 7082; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 7083; GFX1132-NEXT: .p2align 6 7084; GFX1132-NEXT: .LBB10_2: ; %atomicrmw.start 7085; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 7086; GFX1132-NEXT: s_waitcnt vmcnt(0) 7087; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 7088; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7089; GFX1132-NEXT: s_add_u32 s8, s36, 44 7090; GFX1132-NEXT: s_addc_u32 s9, s37, 0 7091; GFX1132-NEXT: s_getpc_b64 s[0:1] 7092; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7093; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7094; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 7095; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 7096; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 7097; GFX1132-NEXT: v_mov_b32_e32 v7, 0 7098; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 7099; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] 7100; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] 7101; GFX1132-NEXT: s_mov_b32 s12, s43 7102; GFX1132-NEXT: s_mov_b32 s13, s42 7103; GFX1132-NEXT: s_mov_b32 s14, s33 7104; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) 7105; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7106; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off 7107; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 7108; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 7109; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 7110; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 7111; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 7112; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off 7113; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 7114; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 7115; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 7116; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 7117; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 7118; GFX1132-NEXT: s_cbranch_execnz .LBB10_2 7119; GFX1132-NEXT: .LBB10_3: 7120; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 7121; GFX1132-NEXT: s_endpgm 7122; 7123; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7124; GFX7LESS-DPP: ; %bb.0: 7125; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 7126; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7127; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7128; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 7129; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 7130; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 7131; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 7132; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7133; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 7134; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 7135; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 7136; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 7137; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3 7138; GFX7LESS-DPP-NEXT: ; %bb.1: 7139; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 7140; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 7141; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 7142; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7143; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7144; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7145; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 7146; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 7147; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 7148; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 7149; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 7150; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 7151; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 7152; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 7153; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0 7154; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1 7155; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2 7156; GFX7LESS-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7157; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7158; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 7159; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 7160; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 7161; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 7162; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 7163; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 7164; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 7165; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 7166; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 7167; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7168; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7169; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 7170; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 7171; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7172; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 7173; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 7174; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0 7175; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0 7176; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 7177; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 7178; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 7179; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7180; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7181; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7182; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 7183; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 7184; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 7185; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 7186; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 7187; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 7188; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 7189; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 7190; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 7191; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 7192; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 7193; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 7194; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 7195; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 7196; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7197; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 7198; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2 7199; GFX7LESS-DPP-NEXT: .LBB10_3: 7200; GFX7LESS-DPP-NEXT: s_endpgm 7201; 7202; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7203; GFX9-DPP: ; %bb.0: 7204; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7205; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7206; GFX9-DPP-NEXT: s_mov_b32 s50, -1 7207; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 7208; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 7209; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 7210; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 7211; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 7212; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7213; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 7214; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 7215; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 7216; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 7217; GFX9-DPP-NEXT: ; %bb.1: 7218; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 7219; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 7220; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 7221; GFX9-DPP-NEXT: s_mov_b32 s33, s10 7222; GFX9-DPP-NEXT: s_mov_b32 s42, s9 7223; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 7224; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 7225; GFX9-DPP-NEXT: s_mov_b32 s43, s8 7226; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7227; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7228; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7229; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 7230; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 7231; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 7232; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 7233; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 7234; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7235; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7236; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 7237; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7238; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 7239; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 7240; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 7241; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7242; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7243; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7244; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 7245; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 7246; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 7247; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7248; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7249; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7250; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7251; GFX9-DPP-NEXT: s_mov_b32 s12, s43 7252; GFX9-DPP-NEXT: s_mov_b32 s13, s42 7253; GFX9-DPP-NEXT: s_mov_b32 s14, s33 7254; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 7255; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 7256; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 7257; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 7258; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 7259; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 7260; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 7261; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 7262; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 7263; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 7264; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 7265; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 7266; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 7267; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 7268; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 7269; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 7270; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 7271; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7272; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7273; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 7274; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 7275; GFX9-DPP-NEXT: .LBB10_3: 7276; GFX9-DPP-NEXT: s_endpgm 7277; 7278; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7279; GFX1064-DPP: ; %bb.0: 7280; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 7281; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7282; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7283; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 7284; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 7285; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 7286; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 7287; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 7288; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7289; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 7290; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 7291; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 7292; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 7293; GFX1064-DPP-NEXT: ; %bb.1: 7294; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 7295; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 7296; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 7297; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 7298; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 7299; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 7300; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7301; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 7302; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7303; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7304; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 7305; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 7306; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 7307; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 7308; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 7309; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 7310; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7311; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7312; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) 7313; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7314; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44 7315; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0 7316; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 7317; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7318; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7319; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 7320; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 7321; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7322; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 7323; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 7324; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 7325; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 7326; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 7327; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 7328; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 7329; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 7330; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7331; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7332; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7333; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 7334; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 7335; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 7336; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 7337; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7338; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 7339; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 7340; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 7341; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 7342; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 7343; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 7344; GFX1064-DPP-NEXT: s_clause 0x1 7345; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 7346; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 7347; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 7348; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7349; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7350; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 7351; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 7352; GFX1064-DPP-NEXT: .LBB10_3: 7353; GFX1064-DPP-NEXT: s_endpgm 7354; 7355; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7356; GFX1032-DPP: ; %bb.0: 7357; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7358; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7359; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 7360; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 7361; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 7362; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 7363; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 7364; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7365; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 7366; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 7367; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 7368; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 7369; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 7370; GFX1032-DPP-NEXT: ; %bb.1: 7371; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 7372; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 7373; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 7374; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 7375; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 7376; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 7377; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7378; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 7379; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7380; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7381; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 7382; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 7383; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 7384; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 7385; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 7386; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7387; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7388; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) 7389; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7390; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44 7391; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0 7392; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 7393; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7394; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7395; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 7396; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 7397; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7398; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 7399; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 7400; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 7401; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 7402; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 7403; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 7404; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 7405; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 7406; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7407; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7408; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7409; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 7410; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 7411; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 7412; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 7413; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7414; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 7415; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 7416; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 7417; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 7418; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 7419; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 7420; GFX1032-DPP-NEXT: s_clause 0x1 7421; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 7422; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 7423; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 7424; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 7425; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 7426; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 7427; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 7428; GFX1032-DPP-NEXT: .LBB10_3: 7429; GFX1032-DPP-NEXT: s_endpgm 7430; 7431; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7432; GFX1164-DPP: ; %bb.0: 7433; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 7434; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7435; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7436; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 7437; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 7438; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7439; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 7440; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 7441; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 7442; GFX1164-DPP-NEXT: ; %bb.1: 7443; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 7444; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 7445; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 7446; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 7447; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7448; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7449; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7450; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 7451; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 7452; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 7453; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 7454; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 7455; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 7456; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 7457; GFX1164-DPP-NEXT: .p2align 6 7458; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7459; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7460; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 7461; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 7462; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7463; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44 7464; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0 7465; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 7466; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7467; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7468; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 7469; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 7470; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 7471; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 7472; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 7473; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 7474; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7475; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7476; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7477; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 7478; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 7479; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 7480; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7481; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off 7482; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 7483; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 7484; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 7485; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 7486; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 7487; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 7488; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 7489; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off 7490; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 7491; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 7492; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7493; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7494; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] 7495; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 7496; GFX1164-DPP-NEXT: .LBB10_3: 7497; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 7498; GFX1164-DPP-NEXT: s_endpgm 7499; 7500; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: 7501; GFX1132-DPP: ; %bb.0: 7502; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 7503; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7504; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 7505; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 7506; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 7507; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 7508; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 7509; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 7510; GFX1132-DPP-NEXT: ; %bb.1: 7511; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 7512; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 7513; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 7514; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 7515; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 7516; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 7517; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 7518; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 7519; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 7520; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 7521; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 7522; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 7523; GFX1132-DPP-NEXT: .p2align 6 7524; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start 7525; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 7526; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 7527; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 7528; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 7529; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44 7530; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0 7531; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 7532; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7533; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7534; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 7535; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 7536; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 7537; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 7538; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 7539; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 7540; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 7541; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 7542; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 7543; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 7544; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) 7545; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 7546; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off 7547; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 7548; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 7549; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 7550; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 7551; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 7552; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off 7553; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 7554; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 7555; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 7556; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 7557; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 7558; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 7559; GFX1132-DPP-NEXT: .LBB10_3: 7560; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 7561; GFX1132-DPP-NEXT: s_endpgm 7562 %result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1 7563 ret void 7564} 7565 7566define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 { 7567; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 7568; GFX7LESS: ; %bb.0: 7569; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 7570; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7571; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7572; GFX7LESS-NEXT: s_mov_b32 s50, -1 7573; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 7574; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 7575; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 7576; GFX7LESS-NEXT: s_mov_b32 s33, s10 7577; GFX7LESS-NEXT: s_mov_b32 s42, s9 7578; GFX7LESS-NEXT: s_mov_b32 s43, s8 7579; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] 7580; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] 7581; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] 7582; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] 7583; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 7584; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 7585; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 7586; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 7587; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 7588; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7589; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 7590; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 7591; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 7592; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 7593; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 7594; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] 7595; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 7596; GFX7LESS-NEXT: s_mov_b32 s12, s43 7597; GFX7LESS-NEXT: s_mov_b32 s13, s42 7598; GFX7LESS-NEXT: s_mov_b32 s14, s33 7599; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 7600; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 7601; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 7602; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 7603; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 7604; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec 7605; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 7606; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000 7607; GFX7LESS-NEXT: .LBB11_1: ; %ComputeLoop 7608; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 7609; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1] 7610; GFX7LESS-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 7611; GFX7LESS-NEXT: v_readlane_b32 s3, v1, s4 7612; GFX7LESS-NEXT: v_readlane_b32 s2, v0, s4 7613; GFX7LESS-NEXT: s_lshl_b64 s[4:5], 1, s4 7614; GFX7LESS-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 7615; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 7616; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[2:3], s[0:1], 0 7617; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[2:3] 7618; GFX7LESS-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 7619; GFX7LESS-NEXT: s_cbranch_vccnz .LBB11_1 7620; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd 7621; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 7622; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 7623; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 7624; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 7625; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 7626; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5 7627; GFX7LESS-NEXT: ; %bb.3: 7628; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 7629; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 7630; GFX7LESS-NEXT: s_mov_b32 s46, -1 7631; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 7632; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 7633; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 7634; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 7635; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start 7636; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 7637; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 7638; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] 7639; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 7640; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 7641; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 7642; GFX7LESS-NEXT: s_waitcnt expcnt(0) 7643; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] 7644; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 7645; GFX7LESS-NEXT: s_getpc_b64 s[0:1] 7646; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7647; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7648; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 7649; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 7650; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7651; GFX7LESS-NEXT: s_waitcnt expcnt(0) 7652; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 7653; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 7654; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0 7655; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 7656; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 7657; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 7658; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] 7659; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] 7660; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] 7661; GFX7LESS-NEXT: s_mov_b32 s12, s43 7662; GFX7LESS-NEXT: s_mov_b32 s13, s42 7663; GFX7LESS-NEXT: s_mov_b32 s14, s33 7664; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 7665; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] 7666; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] 7667; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 7668; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 7669; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 7670; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] 7671; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 7672; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 7673; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 7674; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 7675; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7676; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] 7677; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4 7678; GFX7LESS-NEXT: .LBB11_5: 7679; GFX7LESS-NEXT: s_endpgm 7680; 7681; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 7682; GFX9: ; %bb.0: 7683; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7684; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7685; GFX9-NEXT: s_mov_b32 s50, -1 7686; GFX9-NEXT: s_mov_b32 s51, 0xe00000 7687; GFX9-NEXT: s_add_u32 s48, s48, s11 7688; GFX9-NEXT: s_addc_u32 s49, s49, 0 7689; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] 7690; GFX9-NEXT: s_mov_b32 s43, s8 7691; GFX9-NEXT: s_add_u32 s8, s36, 44 7692; GFX9-NEXT: s_mov_b32 s42, s9 7693; GFX9-NEXT: s_addc_u32 s9, s37, 0 7694; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] 7695; GFX9-NEXT: s_getpc_b64 s[0:1] 7696; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 7697; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 7698; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7699; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 7700; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 7701; GFX9-NEXT: s_mov_b32 s33, s10 7702; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] 7703; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] 7704; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 7705; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] 7706; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 7707; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 7708; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 7709; GFX9-NEXT: s_mov_b32 s12, s43 7710; GFX9-NEXT: s_mov_b32 s13, s42 7711; GFX9-NEXT: s_mov_b32 s14, s33 7712; GFX9-NEXT: v_mov_b32_e32 v31, v40 7713; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 7714; GFX9-NEXT: s_movk_i32 s32, 0x800 7715; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7716; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 7717; GFX9-NEXT: v_mov_b32_e32 v2, 0 7718; GFX9-NEXT: s_mov_b64 s[0:1], exec 7719; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000 7720; GFX9-NEXT: .LBB11_1: ; %ComputeLoop 7721; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7722; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1] 7723; GFX9-NEXT: v_readlane_b32 s3, v1, s4 7724; GFX9-NEXT: v_readlane_b32 s2, v0, s4 7725; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 7726; GFX9-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 7727; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4 7728; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 7729; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 7730; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 7731; GFX9-NEXT: s_cbranch_scc1 .LBB11_1 7732; GFX9-NEXT: ; %bb.2: ; %ComputeEnd 7733; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7734; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 7735; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 7736; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 7737; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 7738; GFX9-NEXT: s_cbranch_execz .LBB11_5 7739; GFX9-NEXT: ; %bb.3: 7740; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 7741; GFX9-NEXT: v_mov_b32_e32 v0, 0 7742; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 7743; GFX9-NEXT: s_mov_b64 s[46:47], 0 7744; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7745; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 7746; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start 7747; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7748; GFX9-NEXT: s_waitcnt vmcnt(0) 7749; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 7750; GFX9-NEXT: s_add_u32 s8, s36, 44 7751; GFX9-NEXT: s_addc_u32 s9, s37, 0 7752; GFX9-NEXT: s_getpc_b64 s[0:1] 7753; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7754; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7755; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7756; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] 7757; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 7758; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 7759; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] 7760; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 7761; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] 7762; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] 7763; GFX9-NEXT: s_mov_b32 s12, s43 7764; GFX9-NEXT: s_mov_b32 s13, s42 7765; GFX9-NEXT: s_mov_b32 s14, s33 7766; GFX9-NEXT: v_mov_b32_e32 v31, v40 7767; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] 7768; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 7769; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 7770; GFX9-NEXT: v_mov_b32_e32 v0, 8 7771; GFX9-NEXT: v_mov_b32_e32 v1, 0 7772; GFX9-NEXT: v_mov_b32_e32 v2, s44 7773; GFX9-NEXT: v_mov_b32_e32 v3, s45 7774; GFX9-NEXT: v_mov_b32_e32 v4, 0 7775; GFX9-NEXT: v_mov_b32_e32 v5, 8 7776; GFX9-NEXT: v_mov_b32_e32 v6, 0 7777; GFX9-NEXT: v_mov_b32_e32 v7, 0 7778; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7779; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 7780; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 7781; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 7782; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 7783; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7784; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7785; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] 7786; GFX9-NEXT: s_cbranch_execnz .LBB11_4 7787; GFX9-NEXT: .LBB11_5: 7788; GFX9-NEXT: s_endpgm 7789; 7790; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 7791; GFX1064: ; %bb.0: 7792; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7793; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7794; GFX1064-NEXT: s_mov_b32 s50, -1 7795; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 7796; GFX1064-NEXT: s_add_u32 s48, s48, s11 7797; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] 7798; GFX1064-NEXT: s_addc_u32 s49, s49, 0 7799; GFX1064-NEXT: s_mov_b32 s43, s8 7800; GFX1064-NEXT: s_add_u32 s8, s34, 44 7801; GFX1064-NEXT: s_mov_b32 s42, s9 7802; GFX1064-NEXT: s_addc_u32 s9, s35, 0 7803; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] 7804; GFX1064-NEXT: s_getpc_b64 s[0:1] 7805; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 7806; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 7807; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2 7808; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7809; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 7810; GFX1064-NEXT: s_mov_b32 s33, s10 7811; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] 7812; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] 7813; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] 7814; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 7815; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 7816; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 7817; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] 7818; GFX1064-NEXT: s_mov_b32 s12, s43 7819; GFX1064-NEXT: v_mov_b32_e32 v31, v40 7820; GFX1064-NEXT: s_mov_b32 s13, s42 7821; GFX1064-NEXT: s_mov_b32 s14, s33 7822; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 7823; GFX1064-NEXT: s_movk_i32 s32, 0x800 7824; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 7825; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 7826; GFX1064-NEXT: v_mov_b32_e32 v2, 0 7827; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000 7828; GFX1064-NEXT: s_mov_b64 s[0:1], exec 7829; GFX1064-NEXT: .LBB11_1: ; %ComputeLoop 7830; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 7831; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1] 7832; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 7833; GFX1064-NEXT: v_readlane_b32 s3, v1, s4 7834; GFX1064-NEXT: v_readlane_b32 s2, v0, s4 7835; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 7836; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4 7837; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] 7838; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0 7839; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 7840; GFX1064-NEXT: s_cbranch_scc1 .LBB11_1 7841; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd 7842; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7843; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 7844; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 7845; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 7846; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 7847; GFX1064-NEXT: s_cbranch_execz .LBB11_5 7848; GFX1064-NEXT: ; %bb.3: 7849; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 7850; GFX1064-NEXT: v_mov_b32_e32 v0, 0 7851; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 7852; GFX1064-NEXT: s_mov_b64 s[46:47], 0 7853; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 7854; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 7855; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start 7856; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 7857; GFX1064-NEXT: s_waitcnt vmcnt(0) 7858; GFX1064-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 7859; GFX1064-NEXT: s_add_u32 s8, s34, 44 7860; GFX1064-NEXT: s_addc_u32 s9, s35, 0 7861; GFX1064-NEXT: s_getpc_b64 s[0:1] 7862; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7863; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7864; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 7865; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 7866; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7867; GFX1064-NEXT: v_mov_b32_e32 v31, v40 7868; GFX1064-NEXT: v_mov_b32_e32 v2, s44 7869; GFX1064-NEXT: v_mov_b32_e32 v3, s45 7870; GFX1064-NEXT: v_mov_b32_e32 v4, 0 7871; GFX1064-NEXT: v_mov_b32_e32 v5, 8 7872; GFX1064-NEXT: v_mov_b32_e32 v6, 0 7873; GFX1064-NEXT: v_mov_b32_e32 v7, 0 7874; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] 7875; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] 7876; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] 7877; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] 7878; GFX1064-NEXT: s_mov_b32 s12, s43 7879; GFX1064-NEXT: s_mov_b32 s13, s42 7880; GFX1064-NEXT: s_mov_b32 s14, s33 7881; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] 7882; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 7883; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 7884; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 7885; GFX1064-NEXT: v_mov_b32_e32 v0, 8 7886; GFX1064-NEXT: v_mov_b32_e32 v1, 0 7887; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 7888; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] 7889; GFX1064-NEXT: s_clause 0x1 7890; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 7891; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 7892; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 7893; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 7894; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 7895; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] 7896; GFX1064-NEXT: s_cbranch_execnz .LBB11_4 7897; GFX1064-NEXT: .LBB11_5: 7898; GFX1064-NEXT: s_endpgm 7899; 7900; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 7901; GFX1032: ; %bb.0: 7902; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 7903; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 7904; GFX1032-NEXT: s_mov_b32 s50, -1 7905; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 7906; GFX1032-NEXT: s_add_u32 s48, s48, s11 7907; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] 7908; GFX1032-NEXT: s_addc_u32 s49, s49, 0 7909; GFX1032-NEXT: s_mov_b32 s43, s8 7910; GFX1032-NEXT: s_add_u32 s8, s34, 44 7911; GFX1032-NEXT: s_mov_b32 s42, s9 7912; GFX1032-NEXT: s_addc_u32 s9, s35, 0 7913; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] 7914; GFX1032-NEXT: s_getpc_b64 s[0:1] 7915; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 7916; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 7917; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2 7918; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7919; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 7920; GFX1032-NEXT: s_mov_b32 s33, s10 7921; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] 7922; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] 7923; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] 7924; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 7925; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 7926; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 7927; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] 7928; GFX1032-NEXT: s_mov_b32 s12, s43 7929; GFX1032-NEXT: v_mov_b32_e32 v31, v40 7930; GFX1032-NEXT: s_mov_b32 s13, s42 7931; GFX1032-NEXT: s_mov_b32 s14, s33 7932; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 7933; GFX1032-NEXT: s_movk_i32 s32, 0x400 7934; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 7935; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 7936; GFX1032-NEXT: v_mov_b32_e32 v2, 0 7937; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000 7938; GFX1032-NEXT: s_mov_b32 s0, exec_lo 7939; GFX1032-NEXT: .LBB11_1: ; %ComputeLoop 7940; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 7941; GFX1032-NEXT: s_ff1_i32_b32 s1, s0 7942; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 7943; GFX1032-NEXT: v_readlane_b32 s3, v1, s1 7944; GFX1032-NEXT: v_readlane_b32 s2, v0, s1 7945; GFX1032-NEXT: s_lshl_b32 s1, 1, s1 7946; GFX1032-NEXT: s_andn2_b32 s0, s0, s1 7947; GFX1032-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 7948; GFX1032-NEXT: s_cmp_lg_u32 s0, 0 7949; GFX1032-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 7950; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1 7951; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd 7952; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 7953; GFX1032-NEXT: s_mov_b32 s46, 0 7954; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 7955; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 7956; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 7957; GFX1032-NEXT: s_cbranch_execz .LBB11_5 7958; GFX1032-NEXT: ; %bb.3: 7959; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 7960; GFX1032-NEXT: v_mov_b32_e32 v0, 0 7961; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 7962; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 7963; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] 7964; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start 7965; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 7966; GFX1032-NEXT: s_waitcnt vmcnt(0) 7967; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 7968; GFX1032-NEXT: s_add_u32 s8, s34, 44 7969; GFX1032-NEXT: s_addc_u32 s9, s35, 0 7970; GFX1032-NEXT: s_getpc_b64 s[0:1] 7971; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 7972; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 7973; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 7974; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 7975; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 7976; GFX1032-NEXT: v_mov_b32_e32 v31, v40 7977; GFX1032-NEXT: v_mov_b32_e32 v2, s44 7978; GFX1032-NEXT: v_mov_b32_e32 v3, s45 7979; GFX1032-NEXT: v_mov_b32_e32 v4, 0 7980; GFX1032-NEXT: v_mov_b32_e32 v5, 8 7981; GFX1032-NEXT: v_mov_b32_e32 v6, 0 7982; GFX1032-NEXT: v_mov_b32_e32 v7, 0 7983; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] 7984; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] 7985; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] 7986; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] 7987; GFX1032-NEXT: s_mov_b32 s12, s43 7988; GFX1032-NEXT: s_mov_b32 s13, s42 7989; GFX1032-NEXT: s_mov_b32 s14, s33 7990; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] 7991; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 7992; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 7993; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 7994; GFX1032-NEXT: v_mov_b32_e32 v0, 8 7995; GFX1032-NEXT: v_mov_b32_e32 v1, 0 7996; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 7997; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] 7998; GFX1032-NEXT: s_clause 0x1 7999; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 8000; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 8001; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 8002; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 8003; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 8004; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 8005; GFX1032-NEXT: s_cbranch_execnz .LBB11_4 8006; GFX1032-NEXT: .LBB11_5: 8007; GFX1032-NEXT: s_endpgm 8008; 8009; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8010; GFX1164: ; %bb.0: 8011; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] 8012; GFX1164-NEXT: s_mov_b32 s43, s8 8013; GFX1164-NEXT: s_add_u32 s8, s34, 44 8014; GFX1164-NEXT: s_mov_b32 s42, s9 8015; GFX1164-NEXT: s_addc_u32 s9, s35, 0 8016; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] 8017; GFX1164-NEXT: s_getpc_b64 s[0:1] 8018; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8019; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8020; GFX1164-NEXT: v_mov_b32_e32 v31, v0 8021; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8022; GFX1164-NEXT: s_mov_b32 s33, s10 8023; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] 8024; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 8025; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] 8026; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] 8027; GFX1164-NEXT: s_mov_b32 s12, s43 8028; GFX1164-NEXT: s_mov_b32 s13, s42 8029; GFX1164-NEXT: s_mov_b32 s14, s33 8030; GFX1164-NEXT: s_mov_b32 s32, 32 8031; GFX1164-NEXT: v_mov_b32_e32 v40, v0 8032; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] 8033; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 8034; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 8035; GFX1164-NEXT: v_mov_b32_e32 v2, 0 8036; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000 8037; GFX1164-NEXT: s_mov_b64 s[0:1], exec 8038; GFX1164-NEXT: .LBB11_1: ; %ComputeLoop 8039; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 8040; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8041; GFX1164-NEXT: s_ctz_i32_b64 s4, s[0:1] 8042; GFX1164-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 8043; GFX1164-NEXT: v_readlane_b32 s3, v1, s4 8044; GFX1164-NEXT: v_readlane_b32 s2, v0, s4 8045; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 8046; GFX1164-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 8047; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4 8048; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3] 8049; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8050; GFX1164-NEXT: s_cmp_lg_u64 s[0:1], 0 8051; GFX1164-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 8052; GFX1164-NEXT: s_cbranch_scc1 .LBB11_1 8053; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd 8054; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 8055; GFX1164-NEXT: s_mov_b64 s[0:1], exec 8056; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8057; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 8058; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 8059; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 8060; GFX1164-NEXT: s_cbranch_execz .LBB11_5 8061; GFX1164-NEXT: ; %bb.3: 8062; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 8063; GFX1164-NEXT: v_mov_b32_e32 v0, 0 8064; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 8065; GFX1164-NEXT: s_mov_b64 s[46:47], 0 8066; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 8067; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] 8068; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 8069; GFX1164-NEXT: .p2align 6 8070; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start 8071; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1 8072; GFX1164-NEXT: s_waitcnt vmcnt(0) 8073; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 8074; GFX1164-NEXT: s_add_u32 s8, s34, 44 8075; GFX1164-NEXT: s_addc_u32 s9, s35, 0 8076; GFX1164-NEXT: s_getpc_b64 s[0:1] 8077; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8078; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8079; GFX1164-NEXT: v_mov_b32_e32 v31, v40 8080; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8081; GFX1164-NEXT: v_mov_b32_e32 v2, s44 8082; GFX1164-NEXT: v_mov_b32_e32 v3, s45 8083; GFX1164-NEXT: v_mov_b32_e32 v6, 0 8084; GFX1164-NEXT: v_mov_b32_e32 v7, 0 8085; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] 8086; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] 8087; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] 8088; GFX1164-NEXT: s_mov_b32 s12, s43 8089; GFX1164-NEXT: s_mov_b32 s13, s42 8090; GFX1164-NEXT: s_mov_b32 s14, s33 8091; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 8092; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off 8093; GFX1164-NEXT: v_mov_b32_e32 v4, 0 8094; GFX1164-NEXT: v_mov_b32_e32 v5, 8 8095; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8 8096; GFX1164-NEXT: v_mov_b32_e32 v0, 8 8097; GFX1164-NEXT: v_mov_b32_e32 v1, 0 8098; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 8099; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] 8100; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off 8101; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 8102; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 8103; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 8104; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 8105; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] 8106; GFX1164-NEXT: s_cbranch_execnz .LBB11_4 8107; GFX1164-NEXT: .LBB11_5: 8108; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 8109; GFX1164-NEXT: s_endpgm 8110; 8111; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8112; GFX1132: ; %bb.0: 8113; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] 8114; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] 8115; GFX1132-NEXT: s_add_u32 s8, s34, 44 8116; GFX1132-NEXT: s_addc_u32 s9, s35, 0 8117; GFX1132-NEXT: s_getpc_b64 s[0:1] 8118; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8119; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8120; GFX1132-NEXT: v_mov_b32_e32 v31, v0 8121; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8122; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] 8123; GFX1132-NEXT: s_mov_b32 s42, s14 8124; GFX1132-NEXT: s_mov_b32 s43, s13 8125; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 8126; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] 8127; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] 8128; GFX1132-NEXT: s_mov_b32 s12, s13 8129; GFX1132-NEXT: s_mov_b32 s13, s14 8130; GFX1132-NEXT: s_mov_b32 s14, s15 8131; GFX1132-NEXT: s_mov_b32 s32, 32 8132; GFX1132-NEXT: s_mov_b32 s33, s15 8133; GFX1132-NEXT: v_mov_b32_e32 v40, v0 8134; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] 8135; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 8136; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 8137; GFX1132-NEXT: v_mov_b32_e32 v2, 0 8138; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000 8139; GFX1132-NEXT: s_mov_b32 s0, exec_lo 8140; GFX1132-NEXT: .LBB11_1: ; %ComputeLoop 8141; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 8142; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8143; GFX1132-NEXT: s_ctz_i32_b32 s1, s0 8144; GFX1132-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 8145; GFX1132-NEXT: v_readlane_b32 s3, v1, s1 8146; GFX1132-NEXT: v_readlane_b32 s2, v0, s1 8147; GFX1132-NEXT: s_lshl_b32 s1, 1, s1 8148; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8149; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1 8150; GFX1132-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3] 8151; GFX1132-NEXT: s_cmp_lg_u32 s0, 0 8152; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 8153; GFX1132-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] 8154; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1 8155; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd 8156; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 8157; GFX1132-NEXT: s_mov_b32 s46, 0 8158; GFX1132-NEXT: s_mov_b32 s0, exec_lo 8159; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 8160; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 8161; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 8162; GFX1132-NEXT: s_cbranch_execz .LBB11_5 8163; GFX1132-NEXT: ; %bb.3: 8164; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 8165; GFX1132-NEXT: v_mov_b32_e32 v0, 0 8166; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) 8167; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] 8168; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 8169; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] 8170; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 8171; GFX1132-NEXT: .p2align 6 8172; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start 8173; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1 8174; GFX1132-NEXT: s_waitcnt vmcnt(0) 8175; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5] 8176; GFX1132-NEXT: s_add_u32 s8, s34, 44 8177; GFX1132-NEXT: s_addc_u32 s9, s35, 0 8178; GFX1132-NEXT: s_getpc_b64 s[0:1] 8179; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8180; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8181; GFX1132-NEXT: v_mov_b32_e32 v31, v40 8182; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8183; GFX1132-NEXT: v_mov_b32_e32 v3, s45 8184; GFX1132-NEXT: v_mov_b32_e32 v7, 0 8185; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] 8186; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] 8187; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] 8188; GFX1132-NEXT: s_mov_b32 s12, s43 8189; GFX1132-NEXT: s_mov_b32 s13, s42 8190; GFX1132-NEXT: s_mov_b32 s14, s33 8191; GFX1132-NEXT: v_mov_b32_e32 v6, 0 8192; GFX1132-NEXT: v_mov_b32_e32 v2, s44 8193; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 8194; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off 8195; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 8196; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8 8197; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0 8198; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 8199; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] 8200; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off 8201; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 8202; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 8203; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 8204; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 8205; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 8206; GFX1132-NEXT: s_cbranch_execnz .LBB11_4 8207; GFX1132-NEXT: .LBB11_5: 8208; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 8209; GFX1132-NEXT: s_endpgm 8210; 8211; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8212; GFX7LESS-DPP: ; %bb.0: 8213; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 8214; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 8215; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 8216; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 8217; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 8218; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 8219; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 8220; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 8221; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 8222; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 8223; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 8224; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 8225; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8226; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8227; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 8228; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 8229; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 8230; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 8231; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 8232; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 8233; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8234; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8235; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8236; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 8237; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 8238; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 8239; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 8240; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8241; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8242; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 8243; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 8244; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 8245; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 8246; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 8247; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8248; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8249; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 8250; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8251; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 8252; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 8253; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] 8254; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start 8255; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8256; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 8257; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 8258; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 8259; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 8260; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 8261; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] 8262; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 8263; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] 8264; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8265; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8266; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 8267; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 8268; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8269; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 8270; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 8271; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0 8272; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0 8273; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 8274; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 8275; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 8276; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8277; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8278; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 8279; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 8280; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 8281; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 8282; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 8283; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8284; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8285; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 8286; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 8287; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 8288; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8289; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8290; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 8291; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 8292; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 8293; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 8294; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 8295; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1 8296; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end 8297; GFX7LESS-DPP-NEXT: s_endpgm 8298; 8299; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8300; GFX9-DPP: ; %bb.0: 8301; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 8302; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 8303; GFX9-DPP-NEXT: s_mov_b32 s54, -1 8304; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 8305; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 8306; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 8307; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] 8308; GFX9-DPP-NEXT: s_mov_b32 s43, s8 8309; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 8310; GFX9-DPP-NEXT: s_mov_b32 s42, s9 8311; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 8312; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8313; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 8314; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8315; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8316; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8317; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 8318; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 8319; GFX9-DPP-NEXT: s_mov_b32 s33, s10 8320; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] 8321; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8322; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 8323; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8324; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] 8325; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8326; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 8327; GFX9-DPP-NEXT: s_mov_b32 s12, s43 8328; GFX9-DPP-NEXT: s_mov_b32 s13, s42 8329; GFX9-DPP-NEXT: s_mov_b32 s14, s33 8330; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 8331; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] 8332; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 8333; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 8334; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8335; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 8336; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 8337; GFX9-DPP-NEXT: v_cndmask_b32_e64 v11, v9, v1, s[0:1] 8338; GFX9-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 8339; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 8340; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 8341; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0 8342; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf 8343; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf 8344; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8345; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 8346; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 8347; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 8348; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 8349; GFX9-DPP-NEXT: s_nop 0 8350; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf 8351; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf 8352; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 8353; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 8354; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 8355; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 8356; GFX9-DPP-NEXT: s_nop 0 8357; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf 8358; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf 8359; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 8360; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 8361; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 8362; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 8363; GFX9-DPP-NEXT: s_nop 0 8364; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf 8365; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf 8366; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 8367; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 8368; GFX9-DPP-NEXT: v_mov_b32_e32 v13, 0x7ff80000 8369; GFX9-DPP-NEXT: v_mov_b32_e32 v12, 0 8370; GFX9-DPP-NEXT: s_nop 0 8371; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf 8372; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf 8373; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 8374; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13] 8375; GFX9-DPP-NEXT: s_nop 1 8376; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf 8377; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf 8378; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 8379; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8380; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 8381; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 8382; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 8383; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 8384; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 8385; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 8386; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 8387; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] 8388; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 8389; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 8390; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3 8391; GFX9-DPP-NEXT: ; %bb.1: 8392; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 8393; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 8394; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 8395; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] 8396; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start 8397; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8398; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] 8399; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 8400; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] 8401; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 8402; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 8403; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] 8404; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8405; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8406; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8407; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] 8408; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 8409; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 8410; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8411; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 8412; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8413; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] 8414; GFX9-DPP-NEXT: s_mov_b32 s12, s43 8415; GFX9-DPP-NEXT: s_mov_b32 s13, s42 8416; GFX9-DPP-NEXT: s_mov_b32 s14, s33 8417; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 8418; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 8419; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 8420; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] 8421; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 8422; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 8423; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 8424; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 8425; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 8426; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 8427; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 8428; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 8429; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 8430; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8431; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 8432; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 8433; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8434; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 8435; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] 8436; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] 8437; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2 8438; GFX9-DPP-NEXT: .LBB11_3: 8439; GFX9-DPP-NEXT: s_endpgm 8440; 8441; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8442; GFX1064-DPP: ; %bb.0: 8443; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 8444; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 8445; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 8446; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 8447; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 8448; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 8449; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 8450; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 8451; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 8452; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 8453; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 8454; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8455; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 8456; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8457; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8458; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 8459; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8460; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 8461; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 8462; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 8463; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8464; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8465; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 8466; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8467; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8468; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8469; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 8470; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 8471; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 8472; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 8473; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8474; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 8475; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 8476; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8477; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 8478; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 8479; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0 8480; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1] 8481; GFX1064-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 8482; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 8483; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 8484; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8485; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 8486; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8487; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8488; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 8489; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 8490; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 8491; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8492; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8493; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8494; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 8495; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 8496; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 8497; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8498; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8499; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8500; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, 0 8501; GFX1064-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 8502; GFX1064-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 8503; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8504; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8505; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 8506; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 8507; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8508; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8509; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0 8510; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32 8511; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32 8512; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0 8513; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5] 8514; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3] 8515; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8516; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1] 8517; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 8518; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8 8519; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9 8520; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 8521; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 8522; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 8523; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 8524; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3 8525; GFX1064-DPP-NEXT: ; %bb.1: 8526; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 8527; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 8528; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 8529; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] 8530; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start 8531; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8532; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] 8533; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) 8534; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] 8535; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 8536; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 8537; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] 8538; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8539; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8540; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 8541; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 8542; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8543; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 8544; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 8545; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 8546; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 8547; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 8548; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8549; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8550; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8551; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8552; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 8553; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 8554; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 8555; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8556; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] 8557; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 8558; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 8559; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 8560; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 8561; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 8562; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 8563; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 8564; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8565; GFX1064-DPP-NEXT: s_clause 0x1 8566; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 8567; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 8568; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8569; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 8570; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 8571; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] 8572; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2 8573; GFX1064-DPP-NEXT: .LBB11_3: 8574; GFX1064-DPP-NEXT: s_endpgm 8575; 8576; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8577; GFX1032-DPP: ; %bb.0: 8578; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 8579; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 8580; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 8581; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 8582; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 8583; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 8584; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 8585; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 8586; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 8587; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 8588; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 8589; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8590; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 8591; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8592; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8593; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 8594; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8595; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 8596; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 8597; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 8598; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8599; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8600; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 8601; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8602; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8603; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8604; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 8605; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 8606; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 8607; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 8608; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8609; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 8610; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 8611; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8612; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 8613; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 8614; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0 8615; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0 8616; GFX1032-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 8617; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 8618; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 8619; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8620; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 8621; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8622; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8623; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 8624; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 8625; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 8626; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8627; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8628; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8629; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 8630; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 8631; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 8632; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8633; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8634; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8635; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, 0 8636; GFX1032-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 8637; GFX1032-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 8638; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8639; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8640; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 8641; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 8642; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8643; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8644; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 8645; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 8646; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 8647; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 8648; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 8649; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 8650; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 8651; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 8652; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 8653; GFX1032-DPP-NEXT: ; %bb.1: 8654; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 8655; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 8656; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 8657; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] 8658; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start 8659; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8660; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) 8661; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 8662; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 8663; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 8664; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] 8665; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8666; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8667; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 8668; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 8669; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 8670; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 8671; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 8672; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 8673; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 8674; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 8675; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 8676; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 8677; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] 8678; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8679; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8680; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8681; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 8682; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 8683; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 8684; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] 8685; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 8686; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 8687; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 8688; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 8689; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 8690; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 8691; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] 8692; GFX1032-DPP-NEXT: s_clause 0x1 8693; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 8694; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 8695; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8696; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 8697; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 8698; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 8699; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2 8700; GFX1032-DPP-NEXT: .LBB11_3: 8701; GFX1032-DPP-NEXT: s_endpgm 8702; 8703; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8704; GFX1164-DPP: ; %bb.0: 8705; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 8706; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 8707; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 8708; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 8709; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 8710; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8711; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 8712; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8713; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8714; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0 8715; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8716; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 8717; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 8718; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8719; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8720; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8721; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 8722; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 8723; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 8724; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 8725; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 8726; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8727; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 8728; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 8729; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 8730; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000 8731; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0 8732; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s[0:1] 8733; GFX1164-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[0:1] 8734; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8735; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 8736; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 8737; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8738; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8739; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 8740; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8741; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8742; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 8743; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8744; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 8745; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 8746; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8747; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8748; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8749; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8750; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 8751; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8752; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 8753; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 8754; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8755; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8756; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8757; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, 0x7ff80000 8758; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 8759; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8760; GFX1164-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 8761; GFX1164-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 8762; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8763; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8764; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8765; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8766; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 8767; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 8768; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8769; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8770; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8771; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8772; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9 8773; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8 8774; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8775; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8776; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8777; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] 8778; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8779; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 8780; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8 8781; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 8782; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9 8783; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 8784; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v0 8785; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 8786; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 8787; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 8788; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3 8789; GFX1164-DPP-NEXT: ; %bb.1: 8790; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 8791; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 8792; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 8793; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 8794; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] 8795; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 8796; GFX1164-DPP-NEXT: .p2align 6 8797; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start 8798; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8799; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) 8800; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 8801; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 8802; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 8803; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] 8804; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8805; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8806; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 8807; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8808; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 8809; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 8810; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 8811; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 8812; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8813; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8814; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8815; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 8816; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 8817; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 8818; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 8819; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off 8820; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 8821; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 8822; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 8823; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 8824; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 8825; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 8826; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 8827; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off 8828; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8829; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 8830; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 8831; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] 8832; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] 8833; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2 8834; GFX1164-DPP-NEXT: .LBB11_3: 8835; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 8836; GFX1164-DPP-NEXT: s_endpgm 8837; 8838; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: 8839; GFX1132-DPP: ; %bb.0: 8840; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] 8841; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] 8842; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 8843; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 8844; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 8845; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 8846; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 8847; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 8848; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8849; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] 8850; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 8851; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 8852; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8853; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] 8854; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8855; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 8856; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 8857; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 8858; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 8859; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 8860; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 8861; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] 8862; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 8863; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 8864; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 8865; GFX1132-DPP-NEXT: v_dual_mov_b32 v9, 0x7ff80000 :: v_dual_mov_b32 v8, 0 8866; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v1, s0 8867; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 8868; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8869; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:1 row_mask:0xf bank_mask:0xf 8870; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:1 row_mask:0xf bank_mask:0xf 8871; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8872; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8873; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 8874; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9] 8875; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 8876; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8877; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:2 row_mask:0xf bank_mask:0xf 8878; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:2 row_mask:0xf bank_mask:0xf 8879; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8880; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8881; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8882; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 8883; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8884; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:4 row_mask:0xf bank_mask:0xf 8885; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:4 row_mask:0xf bank_mask:0xf 8886; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8887; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8888; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8889; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, 0x7ff80000 :: v_dual_mov_b32 v10, 0 8890; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8891; GFX1132-DPP-NEXT: v_mov_b32_dpp v11, v9 row_xmask:8 row_mask:0xf bank_mask:0xf 8892; GFX1132-DPP-NEXT: v_mov_b32_dpp v10, v8 row_xmask:8 row_mask:0xf bank_mask:0xf 8893; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8894; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8895; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8896; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8897; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v9, 0, 0 8898; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 8899; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 8900; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 8901; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] 8902; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0 8903; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 8904; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 8905; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 8906; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 8907; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 8908; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 8909; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) 8910; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 8911; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3 8912; GFX1132-DPP-NEXT: ; %bb.1: 8913; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 8914; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] 8915; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 8916; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] 8917; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 8918; GFX1132-DPP-NEXT: .p2align 6 8919; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start 8920; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 8921; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) 8922; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2] 8923; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 8924; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 8925; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] 8926; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 8927; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 8928; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 8929; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 8930; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 8931; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 8932; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] 8933; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] 8934; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] 8935; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 8936; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 8937; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 8938; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) 8939; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] 8940; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off 8941; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 8942; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 8943; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 8944; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 8945; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] 8946; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off 8947; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 8948; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 8949; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 8950; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 8951; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 8952; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2 8953; GFX1132-DPP-NEXT: .LBB11_3: 8954; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 8955; GFX1132-DPP-NEXT: s_endpgm 8956 %divValue = call double @div.double.value() 8957 %result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue monotonic, align 4, !amdgpu.no.fine.grained.memory !1 8958 ret void 8959} 8960 8961define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) { 8962; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 8963; GFX7LESS: ; %bb.0: 8964; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 8965; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 8966; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 8967; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 8968; GFX7LESS-NEXT: s_cbranch_execz .LBB12_3 8969; GFX7LESS-NEXT: ; %bb.1: 8970; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 8971; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 8972; GFX7LESS-NEXT: s_load_dword s2, s[0:1], 0x0 8973; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 8974; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 8975; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 8976; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 8977; GFX7LESS-NEXT: s_mov_b32 s2, -1 8978; GFX7LESS-NEXT: .LBB12_2: ; %atomicrmw.start 8979; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 8980; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 8981; GFX7LESS-NEXT: v_max_f32_e32 v0, 4.0, v0 8982; GFX7LESS-NEXT: s_waitcnt expcnt(0) 8983; GFX7LESS-NEXT: v_mov_b32_e32 v3, v1 8984; GFX7LESS-NEXT: v_mov_b32_e32 v2, v0 8985; GFX7LESS-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 8986; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 8987; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 8988; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8989; GFX7LESS-NEXT: v_mov_b32_e32 v1, v2 8990; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 8991; GFX7LESS-NEXT: s_cbranch_execnz .LBB12_2 8992; GFX7LESS-NEXT: .LBB12_3: 8993; GFX7LESS-NEXT: s_endpgm 8994; 8995; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 8996; GFX9: ; %bb.0: 8997; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 8998; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 8999; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9000; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 9001; GFX9-NEXT: s_cbranch_execz .LBB12_3 9002; GFX9-NEXT: ; %bb.1: 9003; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9004; GFX9-NEXT: s_mov_b64 s[2:3], 0 9005; GFX9-NEXT: v_mov_b32_e32 v2, 0 9006; GFX9-NEXT: s_waitcnt lgkmcnt(0) 9007; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 9008; GFX9-NEXT: s_waitcnt lgkmcnt(0) 9009; GFX9-NEXT: v_mov_b32_e32 v1, s4 9010; GFX9-NEXT: .LBB12_2: ; %atomicrmw.start 9011; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 9012; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 9013; GFX9-NEXT: v_max_f32_e32 v0, 4.0, v0 9014; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 9015; GFX9-NEXT: s_waitcnt vmcnt(0) 9016; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 9017; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 9018; GFX9-NEXT: v_mov_b32_e32 v1, v0 9019; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 9020; GFX9-NEXT: s_cbranch_execnz .LBB12_2 9021; GFX9-NEXT: .LBB12_3: 9022; GFX9-NEXT: s_endpgm 9023; 9024; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9025; GFX1064: ; %bb.0: 9026; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9027; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9028; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9029; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 9030; GFX1064-NEXT: s_cbranch_execz .LBB12_2 9031; GFX1064-NEXT: ; %bb.1: 9032; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9033; GFX1064-NEXT: v_mov_b32_e32 v0, 0 9034; GFX1064-NEXT: v_mov_b32_e32 v1, 4.0 9035; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 9036; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 9037; GFX1064-NEXT: .LBB12_2: 9038; GFX1064-NEXT: s_endpgm 9039; 9040; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9041; GFX1032: ; %bb.0: 9042; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9043; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 9044; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 9045; GFX1032-NEXT: s_cbranch_execz .LBB12_2 9046; GFX1032-NEXT: ; %bb.1: 9047; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9048; GFX1032-NEXT: v_mov_b32_e32 v0, 0 9049; GFX1032-NEXT: v_mov_b32_e32 v1, 4.0 9050; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 9051; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 9052; GFX1032-NEXT: .LBB12_2: 9053; GFX1032-NEXT: s_endpgm 9054; 9055; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9056; GFX1164: ; %bb.0: 9057; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9058; GFX1164-NEXT: s_mov_b64 s[0:1], exec 9059; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 9060; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9061; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 9062; GFX1164-NEXT: s_cbranch_execz .LBB12_2 9063; GFX1164-NEXT: ; %bb.1: 9064; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9065; GFX1164-NEXT: v_mov_b32_e32 v0, 0 9066; GFX1164-NEXT: v_mov_b32_e32 v1, 4.0 9067; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 9068; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9069; GFX1164-NEXT: .LBB12_2: 9070; GFX1164-NEXT: s_endpgm 9071; 9072; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9073; GFX1132: ; %bb.0: 9074; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9075; GFX1132-NEXT: s_mov_b32 s0, exec_lo 9076; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 9077; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 9078; GFX1132-NEXT: s_cbranch_execz .LBB12_2 9079; GFX1132-NEXT: ; %bb.1: 9080; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9081; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 9082; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 9083; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9084; GFX1132-NEXT: .LBB12_2: 9085; GFX1132-NEXT: s_endpgm 9086; 9087; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9088; GFX7LESS-DPP: ; %bb.0: 9089; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 9090; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 9091; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9092; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9093; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB12_3 9094; GFX7LESS-DPP-NEXT: ; %bb.1: 9095; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 9096; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 9097; GFX7LESS-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 9098; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 9099; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 9100; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 9101; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s2 9102; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 9103; GFX7LESS-DPP-NEXT: .LBB12_2: ; %atomicrmw.start 9104; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 9105; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 9106; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 9107; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 9108; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 9109; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v0 9110; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 9111; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 9112; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 9113; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 9114; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v2 9115; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 9116; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB12_2 9117; GFX7LESS-DPP-NEXT: .LBB12_3: 9118; GFX7LESS-DPP-NEXT: s_endpgm 9119; 9120; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9121; GFX9-DPP: ; %bb.0: 9122; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9123; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9124; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9125; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9126; GFX9-DPP-NEXT: s_cbranch_execz .LBB12_3 9127; GFX9-DPP-NEXT: ; %bb.1: 9128; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9129; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 9130; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 9131; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 9132; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 9133; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 9134; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 9135; GFX9-DPP-NEXT: .LBB12_2: ; %atomicrmw.start 9136; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 9137; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 9138; GFX9-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 9139; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 9140; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 9141; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 9142; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 9143; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 9144; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 9145; GFX9-DPP-NEXT: s_cbranch_execnz .LBB12_2 9146; GFX9-DPP-NEXT: .LBB12_3: 9147; GFX9-DPP-NEXT: s_endpgm 9148; 9149; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9150; GFX1064-DPP: ; %bb.0: 9151; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9152; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9153; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9154; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9155; GFX1064-DPP-NEXT: s_cbranch_execz .LBB12_2 9156; GFX1064-DPP-NEXT: ; %bb.1: 9157; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9158; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 9159; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9160; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 9161; GFX1064-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 9162; GFX1064-DPP-NEXT: .LBB12_2: 9163; GFX1064-DPP-NEXT: s_endpgm 9164; 9165; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9166; GFX1032-DPP: ; %bb.0: 9167; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9168; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 9169; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 9170; GFX1032-DPP-NEXT: s_cbranch_execz .LBB12_2 9171; GFX1032-DPP-NEXT: ; %bb.1: 9172; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9173; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 9174; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9175; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 9176; GFX1032-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 9177; GFX1032-DPP-NEXT: .LBB12_2: 9178; GFX1032-DPP-NEXT: s_endpgm 9179; 9180; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9181; GFX1164-DPP: ; %bb.0: 9182; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9183; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 9184; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 9185; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9186; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 9187; GFX1164-DPP-NEXT: s_cbranch_execz .LBB12_2 9188; GFX1164-DPP-NEXT: ; %bb.1: 9189; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9190; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 9191; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9192; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 9193; GFX1164-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9194; GFX1164-DPP-NEXT: .LBB12_2: 9195; GFX1164-DPP-NEXT: s_endpgm 9196; 9197; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9198; GFX1132-DPP: ; %bb.0: 9199; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9200; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 9201; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 9202; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 9203; GFX1132-DPP-NEXT: s_cbranch_execz .LBB12_2 9204; GFX1132-DPP-NEXT: ; %bb.1: 9205; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9206; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 9207; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 9208; GFX1132-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9209; GFX1132-DPP-NEXT: .LBB12_2: 9210; GFX1132-DPP-NEXT: s_endpgm 9211 %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1, !amdgpu.ignore.denormal.mode !1 9212 ret void 9213} 9214 9215define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) { 9216; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9217; GFX7LESS: ; %bb.0: 9218; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 9219; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 9220; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9221; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc 9222; GFX7LESS-NEXT: s_cbranch_execz .LBB13_3 9223; GFX7LESS-NEXT: ; %bb.1: 9224; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 9225; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 9226; GFX7LESS-NEXT: s_load_dword s2, s[0:1], 0x0 9227; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 9228; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 9229; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) 9230; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 9231; GFX7LESS-NEXT: s_mov_b32 s2, -1 9232; GFX7LESS-NEXT: .LBB13_2: ; %atomicrmw.start 9233; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 9234; GFX7LESS-NEXT: v_mul_f32_e32 v0, 1.0, v1 9235; GFX7LESS-NEXT: v_max_f32_e32 v0, 4.0, v0 9236; GFX7LESS-NEXT: s_waitcnt expcnt(0) 9237; GFX7LESS-NEXT: v_mov_b32_e32 v3, v1 9238; GFX7LESS-NEXT: v_mov_b32_e32 v2, v0 9239; GFX7LESS-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 9240; GFX7LESS-NEXT: s_waitcnt vmcnt(0) 9241; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 9242; GFX7LESS-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 9243; GFX7LESS-NEXT: v_mov_b32_e32 v1, v2 9244; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[4:5] 9245; GFX7LESS-NEXT: s_cbranch_execnz .LBB13_2 9246; GFX7LESS-NEXT: .LBB13_3: 9247; GFX7LESS-NEXT: s_endpgm 9248; 9249; GFX9-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9250; GFX9: ; %bb.0: 9251; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9252; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9253; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9254; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc 9255; GFX9-NEXT: s_cbranch_execz .LBB13_3 9256; GFX9-NEXT: ; %bb.1: 9257; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9258; GFX9-NEXT: s_mov_b64 s[2:3], 0 9259; GFX9-NEXT: v_mov_b32_e32 v2, 0 9260; GFX9-NEXT: s_waitcnt lgkmcnt(0) 9261; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 9262; GFX9-NEXT: s_waitcnt lgkmcnt(0) 9263; GFX9-NEXT: v_mov_b32_e32 v1, s4 9264; GFX9-NEXT: .LBB13_2: ; %atomicrmw.start 9265; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 9266; GFX9-NEXT: v_max_f32_e32 v0, v1, v1 9267; GFX9-NEXT: v_max_f32_e32 v0, 4.0, v0 9268; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 9269; GFX9-NEXT: s_waitcnt vmcnt(0) 9270; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 9271; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 9272; GFX9-NEXT: v_mov_b32_e32 v1, v0 9273; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] 9274; GFX9-NEXT: s_cbranch_execnz .LBB13_2 9275; GFX9-NEXT: .LBB13_3: 9276; GFX9-NEXT: s_endpgm 9277; 9278; GFX1064-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9279; GFX1064: ; %bb.0: 9280; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9281; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9282; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9283; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc 9284; GFX1064-NEXT: s_cbranch_execz .LBB13_2 9285; GFX1064-NEXT: ; %bb.1: 9286; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9287; GFX1064-NEXT: v_mov_b32_e32 v0, 0 9288; GFX1064-NEXT: v_mov_b32_e32 v1, 4.0 9289; GFX1064-NEXT: s_waitcnt lgkmcnt(0) 9290; GFX1064-NEXT: global_atomic_fmax v0, v1, s[0:1] 9291; GFX1064-NEXT: .LBB13_2: 9292; GFX1064-NEXT: s_endpgm 9293; 9294; GFX1032-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9295; GFX1032: ; %bb.0: 9296; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9297; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 9298; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo 9299; GFX1032-NEXT: s_cbranch_execz .LBB13_2 9300; GFX1032-NEXT: ; %bb.1: 9301; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9302; GFX1032-NEXT: v_mov_b32_e32 v0, 0 9303; GFX1032-NEXT: v_mov_b32_e32 v1, 4.0 9304; GFX1032-NEXT: s_waitcnt lgkmcnt(0) 9305; GFX1032-NEXT: global_atomic_fmax v0, v1, s[0:1] 9306; GFX1032-NEXT: .LBB13_2: 9307; GFX1032-NEXT: s_endpgm 9308; 9309; GFX1164-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9310; GFX1164: ; %bb.0: 9311; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9312; GFX1164-NEXT: s_mov_b64 s[0:1], exec 9313; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 9314; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9315; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 9316; GFX1164-NEXT: s_cbranch_execz .LBB13_2 9317; GFX1164-NEXT: ; %bb.1: 9318; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9319; GFX1164-NEXT: v_mov_b32_e32 v0, 0 9320; GFX1164-NEXT: v_mov_b32_e32 v1, 4.0 9321; GFX1164-NEXT: s_waitcnt lgkmcnt(0) 9322; GFX1164-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9323; GFX1164-NEXT: .LBB13_2: 9324; GFX1164-NEXT: s_endpgm 9325; 9326; GFX1132-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9327; GFX1132: ; %bb.0: 9328; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9329; GFX1132-NEXT: s_mov_b32 s0, exec_lo 9330; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) 9331; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 9332; GFX1132-NEXT: s_cbranch_execz .LBB13_2 9333; GFX1132-NEXT: ; %bb.1: 9334; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9335; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 9336; GFX1132-NEXT: s_waitcnt lgkmcnt(0) 9337; GFX1132-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9338; GFX1132-NEXT: .LBB13_2: 9339; GFX1132-NEXT: s_endpgm 9340; 9341; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9342; GFX7LESS-DPP: ; %bb.0: 9343; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 9344; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0 9345; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9346; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9347; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB13_3 9348; GFX7LESS-DPP-NEXT: ; %bb.1: 9349; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 9350; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 9351; GFX7LESS-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 9352; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], 0 9353; GFX7LESS-DPP-NEXT: s_mov_b32 s3, 0xf000 9354; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) 9355; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s2 9356; GFX7LESS-DPP-NEXT: s_mov_b32 s2, -1 9357; GFX7LESS-DPP-NEXT: .LBB13_2: ; %atomicrmw.start 9358; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 9359; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v0, 1.0, v1 9360; GFX7LESS-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 9361; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) 9362; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 9363; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, v0 9364; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[2:3], off, s[0:3], 0 glc 9365; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) 9366; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 9367; GFX7LESS-DPP-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 9368; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, v2 9369; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[4:5] 9370; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB13_2 9371; GFX7LESS-DPP-NEXT: .LBB13_3: 9372; GFX7LESS-DPP-NEXT: s_endpgm 9373; 9374; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9375; GFX9-DPP: ; %bb.0: 9376; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9377; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9378; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9379; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9380; GFX9-DPP-NEXT: s_cbranch_execz .LBB13_3 9381; GFX9-DPP-NEXT: ; %bb.1: 9382; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9383; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 9384; GFX9-DPP-NEXT: v_mov_b32_e32 v2, 0 9385; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 9386; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 9387; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) 9388; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 9389; GFX9-DPP-NEXT: .LBB13_2: ; %atomicrmw.start 9390; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 9391; GFX9-DPP-NEXT: v_max_f32_e32 v0, v1, v1 9392; GFX9-DPP-NEXT: v_max_f32_e32 v0, 4.0, v0 9393; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc 9394; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) 9395; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 9396; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 9397; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 9398; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] 9399; GFX9-DPP-NEXT: s_cbranch_execnz .LBB13_2 9400; GFX9-DPP-NEXT: .LBB13_3: 9401; GFX9-DPP-NEXT: s_endpgm 9402; 9403; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9404; GFX1064-DPP: ; %bb.0: 9405; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9406; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9407; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 9408; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc 9409; GFX1064-DPP-NEXT: s_cbranch_execz .LBB13_2 9410; GFX1064-DPP-NEXT: ; %bb.1: 9411; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9412; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0 9413; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9414; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) 9415; GFX1064-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 9416; GFX1064-DPP-NEXT: .LBB13_2: 9417; GFX1064-DPP-NEXT: s_endpgm 9418; 9419; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9420; GFX1032-DPP: ; %bb.0: 9421; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9422; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 9423; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo 9424; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_2 9425; GFX1032-DPP-NEXT: ; %bb.1: 9426; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9427; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 9428; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9429; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) 9430; GFX1032-DPP-NEXT: global_atomic_fmax v0, v1, s[0:1] 9431; GFX1032-DPP-NEXT: .LBB13_2: 9432; GFX1032-DPP-NEXT: s_endpgm 9433; 9434; GFX1164-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9435; GFX1164-DPP: ; %bb.0: 9436; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9437; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec 9438; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 9439; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 9440; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 9441; GFX1164-DPP-NEXT: s_cbranch_execz .LBB13_2 9442; GFX1164-DPP-NEXT: ; %bb.1: 9443; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9444; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0 9445; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 4.0 9446; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) 9447; GFX1164-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9448; GFX1164-DPP-NEXT: .LBB13_2: 9449; GFX1164-DPP-NEXT: s_endpgm 9450; 9451; GFX1132-DPP-LABEL: global_atomic_fmax_uni_address_uni_value_system_scope__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: 9452; GFX1132-DPP: ; %bb.0: 9453; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 9454; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo 9455; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) 9456; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 9457; GFX1132-DPP-NEXT: s_cbranch_execz .LBB13_2 9458; GFX1132-DPP-NEXT: ; %bb.1: 9459; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 9460; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 9461; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) 9462; GFX1132-DPP-NEXT: global_atomic_max_f32 v0, v1, s[0:1] 9463; GFX1132-DPP-NEXT: .LBB13_2: 9464; GFX1132-DPP-NEXT: s_endpgm 9465 %result = atomicrmw fmax ptr addrspace(1) %ptr, float 4.0 monotonic, align 4, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1 9466 ret void 9467} 9468 9469attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 9470 9471!llvm.module.flags = !{!0} 9472!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 9473!1 = !{} 9474 9475