1611212fcSMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 3611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s 4611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 5611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 7611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s 8611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s 9611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 10611212fcSMatt Arsenault 11611212fcSMatt Arsenault; TODO: Delete this and add run lines to use *-atomicrmw-fmin.ll tests 12611212fcSMatt Arsenault 13611212fcSMatt Arsenaultdefine float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) { 14611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_ret_f32: 15611212fcSMatt Arsenault; GFX12: ; %bb.0: 16611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 17611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 18611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 19611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 20611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 21611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 22611212fcSMatt Arsenault; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1 23611212fcSMatt Arsenault; GFX12-NEXT: s_wait_dscnt 0x0 24611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_SE 25611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 26611212fcSMatt Arsenault; 27611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_ret_f32: 28611212fcSMatt Arsenault; GFX940: ; %bb.0: 29611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30611212fcSMatt Arsenault; GFX940-NEXT: ds_min_rtn_f32 v0, v0, v1 31611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt lgkmcnt(0) 32611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 33611212fcSMatt Arsenault; 34611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_ret_f32: 35611212fcSMatt Arsenault; GFX11: ; %bb.0: 36611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 38611212fcSMatt Arsenault; GFX11-NEXT: ds_min_rtn_f32 v0, v0, v1 39611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt lgkmcnt(0) 40611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 41611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 42611212fcSMatt Arsenault; 43611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_ret_f32: 44611212fcSMatt Arsenault; GFX10: ; %bb.0: 45611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 47611212fcSMatt Arsenault; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 48611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 49611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 50611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 51611212fcSMatt Arsenault; 52611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_ret_f32: 53611212fcSMatt Arsenault; GFX90A: ; %bb.0: 54611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55611212fcSMatt Arsenault; GFX90A-NEXT: ds_min_rtn_f32 v0, v0, v1 56611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 57611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 58611212fcSMatt Arsenault; 59611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_ret_f32: 60611212fcSMatt Arsenault; GFX908: ; %bb.0: 61611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62611212fcSMatt Arsenault; GFX908-NEXT: ds_min_rtn_f32 v0, v0, v1 63611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt lgkmcnt(0) 64611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 65611212fcSMatt Arsenault; 66611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_ret_f32: 67611212fcSMatt Arsenault; GFX8: ; %bb.0: 68611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b32 m0, -1 70611212fcSMatt Arsenault; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 71611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 72611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 73611212fcSMatt Arsenault; 74611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_ret_f32: 75611212fcSMatt Arsenault; GFX7: ; %bb.0: 76611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 m0, -1 78611212fcSMatt Arsenault; GFX7-NEXT: ds_min_rtn_f32 v0, v0, v1 79611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 80611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 81611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(3) %ptr, float %val seq_cst 82611212fcSMatt Arsenault ret float %result 83611212fcSMatt Arsenault} 84611212fcSMatt Arsenault 85611212fcSMatt Arsenaultdefine void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) { 86611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_noret_f32: 87611212fcSMatt Arsenault; GFX12: ; %bb.0: 88611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 89611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 90611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 91611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 92611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 93611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 94611212fcSMatt Arsenault; GFX12-NEXT: ds_min_num_f32 v0, v1 95611212fcSMatt Arsenault; GFX12-NEXT: s_wait_dscnt 0x0 96611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_SE 97611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 98611212fcSMatt Arsenault; 99611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_noret_f32: 100611212fcSMatt Arsenault; GFX940: ; %bb.0: 101611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102611212fcSMatt Arsenault; GFX940-NEXT: ds_min_f32 v0, v1 103611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt lgkmcnt(0) 104611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 105611212fcSMatt Arsenault; 106611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_noret_f32: 107611212fcSMatt Arsenault; GFX11: ; %bb.0: 108611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 110611212fcSMatt Arsenault; GFX11-NEXT: ds_min_f32 v0, v1 111611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt lgkmcnt(0) 112611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 113611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 114611212fcSMatt Arsenault; 115611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_noret_f32: 116611212fcSMatt Arsenault; GFX10: ; %bb.0: 117611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 119611212fcSMatt Arsenault; GFX10-NEXT: ds_min_f32 v0, v1 120611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 121611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 122611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 123611212fcSMatt Arsenault; 124611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_noret_f32: 125611212fcSMatt Arsenault; GFX90A: ; %bb.0: 126611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127611212fcSMatt Arsenault; GFX90A-NEXT: ds_min_f32 v0, v1 128611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 129611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 130611212fcSMatt Arsenault; 131611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_noret_f32: 132611212fcSMatt Arsenault; GFX908: ; %bb.0: 133611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134611212fcSMatt Arsenault; GFX908-NEXT: ds_min_f32 v0, v1 135611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt lgkmcnt(0) 136611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 137611212fcSMatt Arsenault; 138611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_noret_f32: 139611212fcSMatt Arsenault; GFX8: ; %bb.0: 140611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b32 m0, -1 142611212fcSMatt Arsenault; GFX8-NEXT: ds_min_f32 v0, v1 143611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 144611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 145611212fcSMatt Arsenault; 146611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_noret_f32: 147611212fcSMatt Arsenault; GFX7: ; %bb.0: 148611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 m0, -1 150611212fcSMatt Arsenault; GFX7-NEXT: ds_min_f32 v0, v1 151611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 152611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 153611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(3) %ptr, float %val seq_cst 154611212fcSMatt Arsenault ret void 155611212fcSMatt Arsenault} 156611212fcSMatt Arsenault 157611212fcSMatt Arsenaultdefine double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) { 158611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_ret_f64: 159611212fcSMatt Arsenault; GFX12: ; %bb.0: 160611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 161611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 162611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 163611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 164611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 165611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 166611212fcSMatt Arsenault; GFX12-NEXT: ds_min_num_rtn_f64 v[0:1], v0, v[1:2] 167611212fcSMatt Arsenault; GFX12-NEXT: s_wait_dscnt 0x0 168611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_SE 169611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 170611212fcSMatt Arsenault; 171611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_ret_f64: 172611212fcSMatt Arsenault; GFX940: ; %bb.0: 173611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v4, v1 175611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v5, v2 176611212fcSMatt Arsenault; GFX940-NEXT: ds_min_rtn_f64 v[0:1], v0, v[4:5] 177611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt lgkmcnt(0) 178611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 179611212fcSMatt Arsenault; 180611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_ret_f64: 181611212fcSMatt Arsenault; GFX11: ; %bb.0: 182611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 184611212fcSMatt Arsenault; GFX11-NEXT: ds_min_rtn_f64 v[0:1], v0, v[1:2] 185611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt lgkmcnt(0) 186611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 187611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 188611212fcSMatt Arsenault; 189611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_ret_f64: 190611212fcSMatt Arsenault; GFX10: ; %bb.0: 191611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 193611212fcSMatt Arsenault; GFX10-NEXT: ds_min_rtn_f64 v[0:1], v0, v[1:2] 194611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 195611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 196611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 197611212fcSMatt Arsenault; 198611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_ret_f64: 199611212fcSMatt Arsenault; GFX90A: ; %bb.0: 200611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v4, v1 202611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v5, v2 203611212fcSMatt Arsenault; GFX90A-NEXT: ds_min_rtn_f64 v[0:1], v0, v[4:5] 204611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 205611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 206611212fcSMatt Arsenault; 207611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_ret_f64: 208611212fcSMatt Arsenault; GFX908: ; %bb.0: 209611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210611212fcSMatt Arsenault; GFX908-NEXT: ds_min_rtn_f64 v[0:1], v0, v[1:2] 211611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt lgkmcnt(0) 212611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 213611212fcSMatt Arsenault; 214611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_ret_f64: 215611212fcSMatt Arsenault; GFX8: ; %bb.0: 216611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b32 m0, -1 218611212fcSMatt Arsenault; GFX8-NEXT: ds_min_rtn_f64 v[0:1], v0, v[1:2] 219611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 220611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 221611212fcSMatt Arsenault; 222611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_ret_f64: 223611212fcSMatt Arsenault; GFX7: ; %bb.0: 224611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 m0, -1 226611212fcSMatt Arsenault; GFX7-NEXT: ds_min_rtn_f64 v[0:1], v0, v[1:2] 227611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 228611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 229611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(3) %ptr, double %val seq_cst 230611212fcSMatt Arsenault ret double %result 231611212fcSMatt Arsenault} 232611212fcSMatt Arsenault 233611212fcSMatt Arsenaultdefine void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) { 234611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_noret_f64: 235611212fcSMatt Arsenault; GFX12: ; %bb.0: 236611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 237611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 238611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 239611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 240611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 241611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 242611212fcSMatt Arsenault; GFX12-NEXT: ds_min_num_f64 v0, v[1:2] 243611212fcSMatt Arsenault; GFX12-NEXT: s_wait_dscnt 0x0 244611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_SE 245611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 246611212fcSMatt Arsenault; 247611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_noret_f64: 248611212fcSMatt Arsenault; GFX940: ; %bb.0: 249611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 250611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v4, v1 251611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v5, v2 252611212fcSMatt Arsenault; GFX940-NEXT: ds_min_f64 v0, v[4:5] 253611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt lgkmcnt(0) 254611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 255611212fcSMatt Arsenault; 256611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_noret_f64: 257611212fcSMatt Arsenault; GFX11: ; %bb.0: 258611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 260611212fcSMatt Arsenault; GFX11-NEXT: ds_min_f64 v0, v[1:2] 261611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt lgkmcnt(0) 262611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 263611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 264611212fcSMatt Arsenault; 265611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_noret_f64: 266611212fcSMatt Arsenault; GFX10: ; %bb.0: 267611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 269611212fcSMatt Arsenault; GFX10-NEXT: ds_min_f64 v0, v[1:2] 270611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 271611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 272611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 273611212fcSMatt Arsenault; 274611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_noret_f64: 275611212fcSMatt Arsenault; GFX90A: ; %bb.0: 276611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 277611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v4, v1 278611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v5, v2 279611212fcSMatt Arsenault; GFX90A-NEXT: ds_min_f64 v0, v[4:5] 280611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 281611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 282611212fcSMatt Arsenault; 283611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_noret_f64: 284611212fcSMatt Arsenault; GFX908: ; %bb.0: 285611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286611212fcSMatt Arsenault; GFX908-NEXT: ds_min_f64 v0, v[1:2] 287611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt lgkmcnt(0) 288611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 289611212fcSMatt Arsenault; 290611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_noret_f64: 291611212fcSMatt Arsenault; GFX8: ; %bb.0: 292611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b32 m0, -1 294611212fcSMatt Arsenault; GFX8-NEXT: ds_min_f64 v0, v[1:2] 295611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 296611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 297611212fcSMatt Arsenault; 298611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_noret_f64: 299611212fcSMatt Arsenault; GFX7: ; %bb.0: 300611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 301611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 m0, -1 302611212fcSMatt Arsenault; GFX7-NEXT: ds_min_f64 v0, v[1:2] 303611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 304611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 305611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(3) %ptr, double %val seq_cst 306611212fcSMatt Arsenault ret void 307611212fcSMatt Arsenault} 308611212fcSMatt Arsenault 309611212fcSMatt Arsenaultdefine float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %val) { 310611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 311611212fcSMatt Arsenault; GFX12: ; %bb.0: 312611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 313611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 314611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 315611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 316611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 317611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 31841439d5bSMatt Arsenault; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV 319611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 320611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 321611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 322611212fcSMatt Arsenault; 323611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 324611212fcSMatt Arsenault; GFX940: ; %bb.0: 325611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326611212fcSMatt Arsenault; GFX940-NEXT: global_load_dword v3, v[0:1], off 327611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[0:1], 0 328*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 329611212fcSMatt Arsenault; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start 330611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 331611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 332611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v5, v3 333*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 334*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 335611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 336611212fcSMatt Arsenault; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 337611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 338611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 339611212fcSMatt Arsenault; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 340611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 341611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] 342611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB4_1 343611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 344611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] 345611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v0, v3 346611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 347611212fcSMatt Arsenault; 348611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 349611212fcSMatt Arsenault; GFX11: ; %bb.0: 350611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 35241439d5bSMatt Arsenault; GFX11-NEXT: global_atomic_min_f32 v0, v[0:1], v2, off glc 353611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 354611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 355611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 356611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 357611212fcSMatt Arsenault; 358611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 359611212fcSMatt Arsenault; GFX10: ; %bb.0: 360611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 36241439d5bSMatt Arsenault; GFX10-NEXT: global_atomic_fmin v0, v[0:1], v2, off glc 363611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 364611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 365611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 366611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 367611212fcSMatt Arsenault; 368611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 369611212fcSMatt Arsenault; GFX90A: ; %bb.0: 370611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371611212fcSMatt Arsenault; GFX90A-NEXT: global_load_dword v3, v[0:1], off 372611212fcSMatt Arsenault; GFX90A-NEXT: s_mov_b64 s[4:5], 0 373*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v2, v2, v2 374611212fcSMatt Arsenault; GFX90A-NEXT: .LBB4_1: ; %atomicrmw.start 375611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 376611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 377611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v5, v3 378*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v3, v5, v5 379*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v4, v3, v2 380611212fcSMatt Arsenault; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off glc 381611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 382611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 383611212fcSMatt Arsenault; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 384611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 385611212fcSMatt Arsenault; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 386611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB4_1 387611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 388611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 389611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v0, v3 390611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 391611212fcSMatt Arsenault; 392611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 393611212fcSMatt Arsenault; GFX908: ; %bb.0: 394611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395611212fcSMatt Arsenault; GFX908-NEXT: global_load_dword v3, v[0:1], off 396611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 397*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v2, v2, v2 398611212fcSMatt Arsenault; GFX908-NEXT: .LBB4_1: ; %atomicrmw.start 399611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 400611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 401611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v4, v3 402*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v3, v4, v4 403*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v3, v3, v2 404611212fcSMatt Arsenault; GFX908-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc 405611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 406611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 407611212fcSMatt Arsenault; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 408611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 409611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 410611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB4_1 411611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 412611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 413611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v0, v3 414611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 415611212fcSMatt Arsenault; 416611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 417611212fcSMatt Arsenault; GFX8: ; %bb.0: 418611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 420611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 421611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 422611212fcSMatt Arsenault; GFX8-NEXT: .LBB4_1: ; %atomicrmw.start 423611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 424611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 425611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, v3 426611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 427611212fcSMatt Arsenault; GFX8-NEXT: v_min_f32_e32 v3, v3, v2 428611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc 429611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 430611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 431611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 432611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 433611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 434611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB4_1 435611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 436611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 437611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v3 438611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 439611212fcSMatt Arsenault; 440611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 441611212fcSMatt Arsenault; GFX7: ; %bb.0: 442611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s6, 0 444611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s7, 0xf000 445611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 44641439d5bSMatt Arsenault; GFX7-NEXT: buffer_atomic_fmin v2, v[0:1], s[4:7], 0 addr64 glc 447611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 448611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 44941439d5bSMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v2 450611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 451611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 452611212fcSMatt Arsenault ret float %result 453611212fcSMatt Arsenault} 454611212fcSMatt Arsenault 455611212fcSMatt Arsenaultdefine void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %val) { 456611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 457611212fcSMatt Arsenault; GFX12: ; %bb.0: 458611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 459611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 460611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 461611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 462611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 463611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 46441439d5bSMatt Arsenault; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV 46541439d5bSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 466611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 467611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 468611212fcSMatt Arsenault; 469611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 470611212fcSMatt Arsenault; GFX940: ; %bb.0: 471611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472*eeac0ffaSNikita Popov; GFX940-NEXT: global_load_dword v3, v[0:1], off 473611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[0:1], 0 474*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 475611212fcSMatt Arsenault; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start 476611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 477611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 478*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 479*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 480611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 481*eeac0ffaSNikita Popov; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 482611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 483611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 484*eeac0ffaSNikita Popov; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 485611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 486*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v3, v2 487611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] 488611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB5_1 489611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 490611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] 491611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 492611212fcSMatt Arsenault; 493611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 494611212fcSMatt Arsenault; GFX11: ; %bb.0: 495611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 49741439d5bSMatt Arsenault; GFX11-NEXT: global_atomic_min_f32 v[0:1], v2, off 49841439d5bSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 499611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 500611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 501611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 502611212fcSMatt Arsenault; 503611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 504611212fcSMatt Arsenault; GFX10: ; %bb.0: 505611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 506611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 50741439d5bSMatt Arsenault; GFX10-NEXT: global_atomic_fmin v[0:1], v2, off 50841439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 509611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 510611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 511611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 512611212fcSMatt Arsenault; 513611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 514611212fcSMatt Arsenault; GFX90A: ; %bb.0: 515611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516*eeac0ffaSNikita Popov; GFX90A-NEXT: global_load_dword v3, v[0:1], off 517611212fcSMatt Arsenault; GFX90A-NEXT: s_mov_b64 s[4:5], 0 518*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v4, v2, v2 519611212fcSMatt Arsenault; GFX90A-NEXT: .LBB5_1: ; %atomicrmw.start 520611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 521611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 522*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3 523*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4 524*eeac0ffaSNikita Popov; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc 525611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 526611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 527*eeac0ffaSNikita Popov; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 528611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 529*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v3, v2 530611212fcSMatt Arsenault; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 531611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB5_1 532611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 533611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 534611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 535611212fcSMatt Arsenault; 536611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 537611212fcSMatt Arsenault; GFX908: ; %bb.0: 538611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539*eeac0ffaSNikita Popov; GFX908-NEXT: global_load_dword v3, v[0:1], off 540611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 541*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v4, v2, v2 542611212fcSMatt Arsenault; GFX908-NEXT: .LBB5_1: ; %atomicrmw.start 543611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 544611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 545*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v2, v3, v3 546*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v2, v2, v4 547*eeac0ffaSNikita Popov; GFX908-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc 548611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 549611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 550*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 551611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 552*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v3, v2 553611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 554611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB5_1 555611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 556611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 557611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 558611212fcSMatt Arsenault; 559611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 560611212fcSMatt Arsenault; GFX8: ; %bb.0: 561611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 563611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 564611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v2 565611212fcSMatt Arsenault; GFX8-NEXT: .LBB5_1: ; %atomicrmw.start 566611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 567611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 568611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 569611212fcSMatt Arsenault; GFX8-NEXT: v_min_f32_e32 v2, v2, v4 570611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 571611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 572611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 573611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 574611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 575611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v2 576611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 577611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB5_1 578611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 579611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 580611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 581611212fcSMatt Arsenault; 582611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 583611212fcSMatt Arsenault; GFX7: ; %bb.0: 584611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s6, 0 586611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s7, 0xf000 587611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 58841439d5bSMatt Arsenault; GFX7-NEXT: buffer_atomic_fmin v2, v[0:1], s[4:7], 0 addr64 589611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 590611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 591611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 592611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 593611212fcSMatt Arsenault ret void 594611212fcSMatt Arsenault} 595611212fcSMatt Arsenault 596611212fcSMatt Arsenaultdefine double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %val) { 597611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 598611212fcSMatt Arsenault; GFX12: ; %bb.0: 599611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 600611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 601611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 602611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 603611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 604611212fcSMatt Arsenault; GFX12-NEXT: global_load_b64 v[4:5], v[0:1], off 605*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 606611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s0, 0 607611212fcSMatt Arsenault; GFX12-NEXT: .LBB6_1: ; %atomicrmw.start 608611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 609611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 610611212fcSMatt Arsenault; GFX12-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4 611*eeac0ffaSNikita Popov; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 612*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7] 613*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[2:3] 614611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 615b3a44665SPierre van Houtryve; GFX12-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV 616611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 617611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 618611212fcSMatt Arsenault; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7] 61986627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 620611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 62186627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 622611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 623611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB6_1 624611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 625611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 626611212fcSMatt Arsenault; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 62786627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 628611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 629611212fcSMatt Arsenault; 630611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 631611212fcSMatt Arsenault; GFX940: ; %bb.0: 632611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 63441439d5bSMatt Arsenault; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 635611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 636611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 637611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 638611212fcSMatt Arsenault; 639611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 640611212fcSMatt Arsenault; GFX11: ; %bb.0: 641611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642611212fcSMatt Arsenault; GFX11-NEXT: global_load_b64 v[4:5], v[0:1], off 643*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 644611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s0, 0 645611212fcSMatt Arsenault; GFX11-NEXT: .LBB6_1: ; %atomicrmw.start 646611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 647611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 648611212fcSMatt Arsenault; GFX11-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4 649*eeac0ffaSNikita Popov; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 650*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 651*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 652611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 653611212fcSMatt Arsenault; GFX11-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off glc 654611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 655611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 656611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 657611212fcSMatt Arsenault; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7] 658611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 659611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 660611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 661611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB6_1 662611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 663611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 664611212fcSMatt Arsenault; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 665611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 666611212fcSMatt Arsenault; 667611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 668611212fcSMatt Arsenault; GFX10: ; %bb.0: 669611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 67141439d5bSMatt Arsenault; GFX10-NEXT: global_atomic_fmin_x2 v[0:1], v[0:1], v[2:3], off glc 672611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 673611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 674611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 675611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 676611212fcSMatt Arsenault; 677611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 678611212fcSMatt Arsenault; GFX90A: ; %bb.0: 679611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68041439d5bSMatt Arsenault; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc 681611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 682611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 683611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 684611212fcSMatt Arsenault; 685611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 686611212fcSMatt Arsenault; GFX908: ; %bb.0: 687611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688611212fcSMatt Arsenault; GFX908-NEXT: global_load_dwordx2 v[4:5], v[0:1], off 689*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 690611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 691611212fcSMatt Arsenault; GFX908-NEXT: .LBB6_1: ; %atomicrmw.start 692611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 693611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 694611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v7, v5 695611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v6, v4 696*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 697*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 698611212fcSMatt Arsenault; GFX908-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc 699611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 700611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 701611212fcSMatt Arsenault; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 702611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 703611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 704611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB6_1 705611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 706611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 707611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v0, v4 708611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v1, v5 709611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 710611212fcSMatt Arsenault; 711611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 712611212fcSMatt Arsenault; GFX8: ; %bb.0: 713611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 715*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 716611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 717611212fcSMatt Arsenault; GFX8-NEXT: .LBB6_1: ; %atomicrmw.start 718611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 719611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 720611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 721611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 722*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 723*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 724611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 725611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 726611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 727611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 728611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 729611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 730611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB6_1 731611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 732611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 733611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 734611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 735611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 736611212fcSMatt Arsenault; 737611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 738611212fcSMatt Arsenault; GFX7: ; %bb.0: 739611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s6, 0 741611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s7, 0xf000 742611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 74341439d5bSMatt Arsenault; GFX7-NEXT: buffer_atomic_fmin_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc 744611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 745611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 74641439d5bSMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v2 74741439d5bSMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v3 748611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 749611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 750611212fcSMatt Arsenault ret double %result 751611212fcSMatt Arsenault} 752611212fcSMatt Arsenault 753611212fcSMatt Arsenaultdefine void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %val) { 754611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 755611212fcSMatt Arsenault; GFX12: ; %bb.0: 756611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 757611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 758611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 759611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 760611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 761*eeac0ffaSNikita Popov; GFX12-NEXT: global_load_b64 v[4:5], v[0:1], off 762*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[2:3], v[2:3] 763611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s0, 0 764611212fcSMatt Arsenault; GFX12-NEXT: .LBB7_1: ; %atomicrmw.start 765611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 766611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 767*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5] 768611212fcSMatt Arsenault; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 769*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] 770611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 771*eeac0ffaSNikita Popov; GFX12-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV 772611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 773611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 774*eeac0ffaSNikita Popov; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5] 775*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2 77686627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 777611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 77886627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 779611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 780611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB7_1 781611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 782611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 78386627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 784611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 785611212fcSMatt Arsenault; 786611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 787611212fcSMatt Arsenault; GFX940: ; %bb.0: 788611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 789611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 79041439d5bSMatt Arsenault; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off 791611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 792611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 793611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 794611212fcSMatt Arsenault; 795611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 796611212fcSMatt Arsenault; GFX11: ; %bb.0: 797611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 798*eeac0ffaSNikita Popov; GFX11-NEXT: global_load_b64 v[4:5], v[0:1], off 799*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 800611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s0, 0 801611212fcSMatt Arsenault; GFX11-NEXT: .LBB7_1: ; %atomicrmw.start 802611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 803611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 804*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 805611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 806*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 807611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 808*eeac0ffaSNikita Popov; GFX11-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off glc 809611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 810611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 811611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 812*eeac0ffaSNikita Popov; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5] 813*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2 814611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 815611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 816611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 817611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB7_1 818611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 819611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 820611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 821611212fcSMatt Arsenault; 822611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 823611212fcSMatt Arsenault; GFX10: ; %bb.0: 824611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 825611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 82641439d5bSMatt Arsenault; GFX10-NEXT: global_atomic_fmin_x2 v[0:1], v[2:3], off 82741439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 828611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 829611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 830611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 831611212fcSMatt Arsenault; 832611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 833611212fcSMatt Arsenault; GFX90A: ; %bb.0: 834611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83541439d5bSMatt Arsenault; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off 836611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 837611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 838611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 839611212fcSMatt Arsenault; 840611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 841611212fcSMatt Arsenault; GFX908: ; %bb.0: 842611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 843*eeac0ffaSNikita Popov; GFX908-NEXT: global_load_dwordx2 v[4:5], v[0:1], off 844*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 845611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 846611212fcSMatt Arsenault; GFX908-NEXT: .LBB7_1: ; %atomicrmw.start 847611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 848611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 849*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 850*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 851*eeac0ffaSNikita Popov; GFX908-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc 852611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 853611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 854*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] 855*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v5, v3 856611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 857*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v4, v2 858611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 859611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB7_1 860611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 861611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 862611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 863611212fcSMatt Arsenault; 864611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 865611212fcSMatt Arsenault; GFX8: ; %bb.0: 866611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 868*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 869611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 870611212fcSMatt Arsenault; GFX8-NEXT: .LBB7_1: ; %atomicrmw.start 871611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 872611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 873*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 874*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 875*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc 876611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 877611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 878*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] 879*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, v3 880611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 881*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, v2 882611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 883611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB7_1 884611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 885611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 886611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 887611212fcSMatt Arsenault; 888611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 889611212fcSMatt Arsenault; GFX7: ; %bb.0: 890611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 891611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s6, 0 892611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b32 s7, 0xf000 893611212fcSMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 89441439d5bSMatt Arsenault; GFX7-NEXT: buffer_atomic_fmin_x2 v[2:3], v[0:1], s[4:7], 0 addr64 895611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 896611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 897611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 898611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 899611212fcSMatt Arsenault ret void 900611212fcSMatt Arsenault} 901611212fcSMatt Arsenault 902611212fcSMatt Arsenaultdefine float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr %ptr, float %val) { 903611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 904611212fcSMatt Arsenault; GFX12: ; %bb.0: 905611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 906611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 907611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 908611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 909611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 910611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 91141439d5bSMatt Arsenault; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV 912611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 913611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 914611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 915611212fcSMatt Arsenault; 916611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 917611212fcSMatt Arsenault; GFX940: ; %bb.0: 918611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 919611212fcSMatt Arsenault; GFX940-NEXT: flat_load_dword v3, v[0:1] 920611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[0:1], 0 921*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 922611212fcSMatt Arsenault; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start 923611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 924611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 925611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v5, v3 926*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 927*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 928611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 929611212fcSMatt Arsenault; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 930611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 931611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 932611212fcSMatt Arsenault; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 933611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 934611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] 935611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB8_1 936611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 937611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] 938611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v0, v3 939611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 940611212fcSMatt Arsenault; 941611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 942611212fcSMatt Arsenault; GFX11: ; %bb.0: 943611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 944611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 94541439d5bSMatt Arsenault; GFX11-NEXT: flat_atomic_min_f32 v0, v[0:1], v2 glc 946611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 947611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 948611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 949611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 950611212fcSMatt Arsenault; 951611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 952611212fcSMatt Arsenault; GFX10: ; %bb.0: 953611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 95541439d5bSMatt Arsenault; GFX10-NEXT: flat_atomic_fmin v0, v[0:1], v2 glc 956611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 957611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 958611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 959611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 960611212fcSMatt Arsenault; 961611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 962611212fcSMatt Arsenault; GFX90A: ; %bb.0: 963611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 964611212fcSMatt Arsenault; GFX90A-NEXT: flat_load_dword v3, v[0:1] 965611212fcSMatt Arsenault; GFX90A-NEXT: s_mov_b64 s[4:5], 0 966*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v2, v2, v2 967611212fcSMatt Arsenault; GFX90A-NEXT: .LBB8_1: ; %atomicrmw.start 968611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 969611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 970611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v5, v3 971*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v3, v5, v5 972*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v4, v3, v2 973611212fcSMatt Arsenault; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc 974611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 975611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 976611212fcSMatt Arsenault; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 977611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 978611212fcSMatt Arsenault; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 979611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB8_1 980611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 981611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 982611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v0, v3 983611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 984611212fcSMatt Arsenault; 985611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 986611212fcSMatt Arsenault; GFX908: ; %bb.0: 987611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988611212fcSMatt Arsenault; GFX908-NEXT: flat_load_dword v3, v[0:1] 989611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 990*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v2, v2, v2 991611212fcSMatt Arsenault; GFX908-NEXT: .LBB8_1: ; %atomicrmw.start 992611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 993611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 994611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v4, v3 995*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v3, v4, v4 996*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v3, v3, v2 997611212fcSMatt Arsenault; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc 998611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 999611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1000611212fcSMatt Arsenault; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 1001611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1002611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1003611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB8_1 1004611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 1005611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1006611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v0, v3 1007611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1008611212fcSMatt Arsenault; 1009611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1010611212fcSMatt Arsenault; GFX8: ; %bb.0: 1011611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1012611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 1013611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1014611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 1015611212fcSMatt Arsenault; GFX8-NEXT: .LBB8_1: ; %atomicrmw.start 1016611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1017611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1018611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, v3 1019611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 1020611212fcSMatt Arsenault; GFX8-NEXT: v_min_f32_e32 v3, v3, v2 1021611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc 1022611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1023611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1024611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 1025611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1026611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1027611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB8_1 1028611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1029611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1030611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v3 1031611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1032611212fcSMatt Arsenault; 1033611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1034611212fcSMatt Arsenault; GFX7: ; %bb.0: 1035611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103641439d5bSMatt Arsenault; GFX7-NEXT: flat_atomic_fmin v0, v[0:1], v2 glc 1037611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1038611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1039611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1040611212fcSMatt Arsenault %result = atomicrmw fmin ptr %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1041611212fcSMatt Arsenault ret float %result 1042611212fcSMatt Arsenault} 1043611212fcSMatt Arsenault 1044611212fcSMatt Arsenaultdefine void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr %ptr, float %val) { 1045611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1046611212fcSMatt Arsenault; GFX12: ; %bb.0: 1047611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1048611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1049611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1050611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1051611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 1052611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 105341439d5bSMatt Arsenault; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV 105441439d5bSMatt Arsenault; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 1055611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1056611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1057611212fcSMatt Arsenault; 1058611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1059611212fcSMatt Arsenault; GFX940: ; %bb.0: 1060611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1061*eeac0ffaSNikita Popov; GFX940-NEXT: flat_load_dword v3, v[0:1] 1062611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[0:1], 0 1063*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 1064611212fcSMatt Arsenault; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start 1065611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 1066611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1067*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 1068*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 1069611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 1070*eeac0ffaSNikita Popov; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 1071611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1072611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1073*eeac0ffaSNikita Popov; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 1074611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1075*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v3, v2 1076611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] 1077611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB9_1 1078611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 1079611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] 1080611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1081611212fcSMatt Arsenault; 1082611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1083611212fcSMatt Arsenault; GFX11: ; %bb.0: 1084611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 108641439d5bSMatt Arsenault; GFX11-NEXT: flat_atomic_min_f32 v[0:1], v2 108741439d5bSMatt Arsenault; GFX11-NEXT: s_waitcnt lgkmcnt(0) 108841439d5bSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1089611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1090611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1091611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1092611212fcSMatt Arsenault; 1093611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1094611212fcSMatt Arsenault; GFX10: ; %bb.0: 1095611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1096611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 109741439d5bSMatt Arsenault; GFX10-NEXT: flat_atomic_fmin v[0:1], v2 109841439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 109941439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1100611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1101611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1102611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1103611212fcSMatt Arsenault; 1104611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1105611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1106611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1107*eeac0ffaSNikita Popov; GFX90A-NEXT: flat_load_dword v3, v[0:1] 1108611212fcSMatt Arsenault; GFX90A-NEXT: s_mov_b64 s[4:5], 0 1109*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v4, v2, v2 1110611212fcSMatt Arsenault; GFX90A-NEXT: .LBB9_1: ; %atomicrmw.start 1111611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 1112611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1113*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3 1114*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4 1115*eeac0ffaSNikita Popov; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 1116611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1117611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1118*eeac0ffaSNikita Popov; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 1119611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1120*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1121611212fcSMatt Arsenault; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 1122611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB9_1 1123611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 1124611212fcSMatt Arsenault; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 1125611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1126611212fcSMatt Arsenault; 1127611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1128611212fcSMatt Arsenault; GFX908: ; %bb.0: 1129611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130*eeac0ffaSNikita Popov; GFX908-NEXT: flat_load_dword v3, v[0:1] 1131611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 1132*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v4, v2, v2 1133611212fcSMatt Arsenault; GFX908-NEXT: .LBB9_1: ; %atomicrmw.start 1134611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1135611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1136*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v2, v3, v3 1137*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v2, v2, v4 1138*eeac0ffaSNikita Popov; GFX908-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 1139611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1140611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1141*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 1142611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1143*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v3, v2 1144611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1145611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB9_1 1146611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 1147611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1148611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1149611212fcSMatt Arsenault; 1150611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1151611212fcSMatt Arsenault; GFX8: ; %bb.0: 1152611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 1154611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1155611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v2 1156611212fcSMatt Arsenault; GFX8-NEXT: .LBB9_1: ; %atomicrmw.start 1157611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1158611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1159611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 1160611212fcSMatt Arsenault; GFX8-NEXT: v_min_f32_e32 v2, v2, v4 1161611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 1162611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1163611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1164611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 1165611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1166611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v2 1167611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1168611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB9_1 1169611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1170611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1171611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1172611212fcSMatt Arsenault; 1173611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1174611212fcSMatt Arsenault; GFX7: ; %bb.0: 1175611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117641439d5bSMatt Arsenault; GFX7-NEXT: flat_atomic_fmin v[0:1], v2 1177611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1178611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1179611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1180611212fcSMatt Arsenault %unused = atomicrmw fmin ptr %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1181611212fcSMatt Arsenault ret void 1182611212fcSMatt Arsenault} 1183611212fcSMatt Arsenault 1184611212fcSMatt Arsenaultdefine double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr %ptr, double %val) { 1185611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1186611212fcSMatt Arsenault; GFX12: ; %bb.0: 1187611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1188611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1189611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1190611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1191611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 1192611212fcSMatt Arsenault; GFX12-NEXT: flat_load_b64 v[4:5], v[0:1] 1193*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 1194611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s0, 0 1195611212fcSMatt Arsenault; GFX12-NEXT: .LBB10_1: ; %atomicrmw.start 1196611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 1197611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1198611212fcSMatt Arsenault; GFX12-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4 1199*eeac0ffaSNikita Popov; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1200*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7] 1201*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[2:3] 1202611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 1203b3a44665SPierre van Houtryve; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 1204611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1205611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1206611212fcSMatt Arsenault; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7] 120786627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1208611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 120986627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1210611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 1211611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB10_1 1212611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 1213611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 1214611212fcSMatt Arsenault; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 121586627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1216611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1217611212fcSMatt Arsenault; 1218611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1219611212fcSMatt Arsenault; GFX940: ; %bb.0: 1220611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1221611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 122241439d5bSMatt Arsenault; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 1223611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1224611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1225611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1226611212fcSMatt Arsenault; 1227611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1228611212fcSMatt Arsenault; GFX11: ; %bb.0: 1229611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230611212fcSMatt Arsenault; GFX11-NEXT: flat_load_b64 v[4:5], v[0:1] 1231*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 1232611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s0, 0 1233611212fcSMatt Arsenault; GFX11-NEXT: .LBB10_1: ; %atomicrmw.start 1234611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1235611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1236611212fcSMatt Arsenault; GFX11-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4 1237*eeac0ffaSNikita Popov; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1238*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 1239*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 1240611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1241611212fcSMatt Arsenault; GFX11-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] glc 1242611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1243611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1244611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1245611212fcSMatt Arsenault; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7] 1246611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 1247611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1248611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 1249611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB10_1 1250611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 1251611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 1252611212fcSMatt Arsenault; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 1253611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1254611212fcSMatt Arsenault; 1255611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1256611212fcSMatt Arsenault; GFX10: ; %bb.0: 1257611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 125941439d5bSMatt Arsenault; GFX10-NEXT: flat_atomic_fmin_x2 v[0:1], v[0:1], v[2:3] glc 1260611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1261611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1262611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1263611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1264611212fcSMatt Arsenault; 1265611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1266611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1267611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126841439d5bSMatt Arsenault; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc 1269611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1270611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1271611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1272611212fcSMatt Arsenault; 1273611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1274611212fcSMatt Arsenault; GFX908: ; %bb.0: 1275611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1276611212fcSMatt Arsenault; GFX908-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 1277*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 1278611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 1279611212fcSMatt Arsenault; GFX908-NEXT: .LBB10_1: ; %atomicrmw.start 1280611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1281611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1282611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v7, v5 1283611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v6, v4 1284*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 1285*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 1286611212fcSMatt Arsenault; GFX908-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 1287611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1288611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1289611212fcSMatt Arsenault; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1290611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1291611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1292611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB10_1 1293611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 1294611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1295611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v0, v4 1296611212fcSMatt Arsenault; GFX908-NEXT: v_mov_b32_e32 v1, v5 1297611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1298611212fcSMatt Arsenault; 1299611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1300611212fcSMatt Arsenault; GFX8: ; %bb.0: 1301611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302611212fcSMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 1303611212fcSMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 1304611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 1305611212fcSMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 1306*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 1307611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1308611212fcSMatt Arsenault; GFX8-NEXT: .LBB10_1: ; %atomicrmw.start 1309611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1310611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1311611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 1312611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 1313*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] 1314*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[2:3] 1315611212fcSMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 1316611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1317611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1318611212fcSMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1319611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1320611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1321611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB10_1 1322611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1323611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1324611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 1325611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 1326611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1327611212fcSMatt Arsenault; 1328611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1329611212fcSMatt Arsenault; GFX7: ; %bb.0: 1330611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133141439d5bSMatt Arsenault; GFX7-NEXT: flat_atomic_fmin_x2 v[0:1], v[0:1], v[2:3] glc 1332611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1333611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1334611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 13351d037087SMatt Arsenault %result = atomicrmw fmin ptr %ptr, double %val syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1336611212fcSMatt Arsenault ret double %result 1337611212fcSMatt Arsenault} 1338611212fcSMatt Arsenault 1339611212fcSMatt Arsenaultdefine void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr %ptr, double %val) { 1340611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1341611212fcSMatt Arsenault; GFX12: ; %bb.0: 1342611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1343611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1344611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1345611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1346611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 1347*eeac0ffaSNikita Popov; GFX12-NEXT: flat_load_b64 v[4:5], v[0:1] 1348*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[2:3], v[2:3] 1349611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s0, 0 1350611212fcSMatt Arsenault; GFX12-NEXT: .LBB11_1: ; %atomicrmw.start 1351611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 1352611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1353*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5] 1354611212fcSMatt Arsenault; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1355*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] 1356611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 1357*eeac0ffaSNikita Popov; GFX12-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN scope:SCOPE_DEV 1358611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1359611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1360*eeac0ffaSNikita Popov; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5] 1361*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2 136286627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1363611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 136486627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1365611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 1366611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB11_1 1367611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 1368611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 136986627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1370611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1371611212fcSMatt Arsenault; 1372611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1373611212fcSMatt Arsenault; GFX940: ; %bb.0: 1374611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 137641439d5bSMatt Arsenault; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] 1377611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1378611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1379611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1380611212fcSMatt Arsenault; 1381611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1382611212fcSMatt Arsenault; GFX11: ; %bb.0: 1383611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1384*eeac0ffaSNikita Popov; GFX11-NEXT: flat_load_b64 v[4:5], v[0:1] 1385*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 1386611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s0, 0 1387611212fcSMatt Arsenault; GFX11-NEXT: .LBB11_1: ; %atomicrmw.start 1388611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1389611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1390*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 1391611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1392*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 1393611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1394*eeac0ffaSNikita Popov; GFX11-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] glc 1395611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1396611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1397611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1398*eeac0ffaSNikita Popov; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5] 1399*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2 1400611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 1401611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1402611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 1403611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB11_1 1404611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 1405611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 1406611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1407611212fcSMatt Arsenault; 1408611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1409611212fcSMatt Arsenault; GFX10: ; %bb.0: 1410611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1411611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 141241439d5bSMatt Arsenault; GFX10-NEXT: flat_atomic_fmin_x2 v[0:1], v[2:3] 141341439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt lgkmcnt(0) 141441439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1415611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1416611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1417611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1418611212fcSMatt Arsenault; 1419611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1420611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1421611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142241439d5bSMatt Arsenault; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] 1423611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1424611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1425611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1426611212fcSMatt Arsenault; 1427611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1428611212fcSMatt Arsenault; GFX908: ; %bb.0: 1429611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1430*eeac0ffaSNikita Popov; GFX908-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 1431*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 1432611212fcSMatt Arsenault; GFX908-NEXT: s_mov_b64 s[4:5], 0 1433611212fcSMatt Arsenault; GFX908-NEXT: .LBB11_1: ; %atomicrmw.start 1434611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1435611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1436*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 1437*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 1438*eeac0ffaSNikita Popov; GFX908-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc 1439611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1440611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1441*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] 1442*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v5, v3 1443611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1444*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v4, v2 1445611212fcSMatt Arsenault; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1446611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB11_1 1447611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 1448611212fcSMatt Arsenault; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1449611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1450611212fcSMatt Arsenault; 1451611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1452611212fcSMatt Arsenault; GFX8: ; %bb.0: 1453611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1454*eeac0ffaSNikita Popov; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 1455*eeac0ffaSNikita Popov; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 1456*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dword v4, v[0:1] 1457*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dword v5, v[5:6] 1458*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3] 1459611212fcSMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1460611212fcSMatt Arsenault; GFX8-NEXT: .LBB11_1: ; %atomicrmw.start 1461611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1462611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1463*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5] 1464*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 1465*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc 1466611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1467611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1468*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] 1469*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, v3 1470611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1471*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, v2 1472611212fcSMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1473611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB11_1 1474611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1475611212fcSMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1476611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1477611212fcSMatt Arsenault; 1478611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1479611212fcSMatt Arsenault; GFX7: ; %bb.0: 1480611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148141439d5bSMatt Arsenault; GFX7-NEXT: flat_atomic_fmin_x2 v[0:1], v[2:3] 1482611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1483611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1484611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 14851d037087SMatt Arsenault %unused = atomicrmw fmin ptr %ptr, double %val syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1486611212fcSMatt Arsenault ret void 1487611212fcSMatt Arsenault} 1488611212fcSMatt Arsenault 1489611212fcSMatt Arsenaultdefine float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, float %val) { 1490611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1491611212fcSMatt Arsenault; GFX12: ; %bb.0: 1492611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1493611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1494611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1495611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1496611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 14976548b635SShilei Tian; GFX12-NEXT: v_mov_b32_e32 v1, s16 1498611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 149941439d5bSMatt Arsenault; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN 1500611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 1501611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1502611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1503611212fcSMatt Arsenault; 1504611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1505611212fcSMatt Arsenault; GFX940: ; %bb.0: 1506611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1507*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v2, s16 1508*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v1, v0 1509*eeac0ffaSNikita Popov; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen 1510611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[4:5], 0 1511*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v3, v1, v1 1512611212fcSMatt Arsenault; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start 1513611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 1514611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 1515611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b32_e32 v5, v0 1516*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 1517*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 1518611212fcSMatt Arsenault; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] 1519611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 1520*eeac0ffaSNikita Popov; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 1521611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 1522611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1523611212fcSMatt Arsenault; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 1524611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1525611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] 1526611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB12_1 1527611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 1528611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] 1529611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1530611212fcSMatt Arsenault; 1531611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1532611212fcSMatt Arsenault; GFX11: ; %bb.0: 1533611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15346548b635SShilei Tian; GFX11-NEXT: v_mov_b32_e32 v1, s16 1535611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 153641439d5bSMatt Arsenault; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 1537611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 1538611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1539611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1540611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1541611212fcSMatt Arsenault; 1542611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1543611212fcSMatt Arsenault; GFX10: ; %bb.0: 1544611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15456548b635SShilei Tian; GFX10-NEXT: v_mov_b32_e32 v1, s20 1546611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 15476548b635SShilei Tian; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[16:19], 0 offen glc 1548611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 1549611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1550611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1551611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1552611212fcSMatt Arsenault; 1553611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1554611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1555611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v2, s20 1557*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1558*eeac0ffaSNikita Popov; GFX90A-NEXT: buffer_load_dword v0, v2, s[16:19], 0 offen 15596548b635SShilei Tian; GFX90A-NEXT: s_mov_b64 s[4:5], 0 1560*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v3, v1, v1 1561611212fcSMatt Arsenault; GFX90A-NEXT: .LBB12_1: ; %atomicrmw.start 1562611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 1563611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 1564611212fcSMatt Arsenault; GFX90A-NEXT: v_mov_b32_e32 v5, v0 1565*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v0, v5, v5 1566*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v4, v0, v3 1567611212fcSMatt Arsenault; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1] 1568*eeac0ffaSNikita Popov; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 1569611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 1570611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1571611212fcSMatt Arsenault; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 15726548b635SShilei Tian; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 15736548b635SShilei Tian; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 1574611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB12_1 1575611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 15766548b635SShilei Tian; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 1577611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1578611212fcSMatt Arsenault; 1579611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1580611212fcSMatt Arsenault; GFX908: ; %bb.0: 1581611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1582*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v2, s20 1583*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v1, v0 1584*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_load_dword v0, v2, s[16:19], 0 offen 15856548b635SShilei Tian; GFX908-NEXT: s_mov_b64 s[4:5], 0 1586*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v3, v1, v1 1587611212fcSMatt Arsenault; GFX908-NEXT: .LBB12_1: ; %atomicrmw.start 1588611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1589611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1590*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v5, v0 1591*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v0, v5, v5 1592*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v4, v0, v3 1593*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v0, v4 1594*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v1, v5 1595*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 1596611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1597611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1598*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 15996548b635SShilei Tian; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 16006548b635SShilei Tian; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1601611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB12_1 1602611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 16036548b635SShilei Tian; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1604611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1605611212fcSMatt Arsenault; 1606611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1607611212fcSMatt Arsenault; GFX8: ; %bb.0: 1608611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1609*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s20 1610611212fcSMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v0 1611*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_load_dword v0, v2, s[16:19], 0 offen 16126548b635SShilei Tian; GFX8-NEXT: s_mov_b64 s[4:5], 0 1613*eeac0ffaSNikita Popov; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v1 1614611212fcSMatt Arsenault; GFX8-NEXT: .LBB12_1: ; %atomicrmw.start 1615611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1616611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1617*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, v0 1618*eeac0ffaSNikita Popov; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v5 1619*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f32_e32 v4, v0, v3 1620*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, v4 1621*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, v5 1622*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc 1623611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1624611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1625*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 16266548b635SShilei Tian; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 16276548b635SShilei Tian; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1628611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB12_1 1629611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 16306548b635SShilei Tian; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1631611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1632611212fcSMatt Arsenault; 1633611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: 1634611212fcSMatt Arsenault; GFX7: ; %bb.0: 1635611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16366548b635SShilei Tian; GFX7-NEXT: v_mov_b32_e32 v1, s20 16376548b635SShilei Tian; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[16:19], 0 offen glc 1638611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 1639611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1640611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1641611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(7) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1642611212fcSMatt Arsenault ret float %result 1643611212fcSMatt Arsenault} 1644611212fcSMatt Arsenault 1645611212fcSMatt Arsenaultdefine void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, float %val) { 1646611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1647611212fcSMatt Arsenault; GFX12: ; %bb.0: 1648611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1649611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1650611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1651611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1652611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 16536548b635SShilei Tian; GFX12-NEXT: v_mov_b32_e32 v1, s16 1654611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 165541439d5bSMatt Arsenault; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen 165641439d5bSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 1657611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1658611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1659611212fcSMatt Arsenault; 1660611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1661611212fcSMatt Arsenault; GFX940: ; %bb.0: 1662611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1663*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v2, s16 1664*eeac0ffaSNikita Popov; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen 1665611212fcSMatt Arsenault; GFX940-NEXT: s_mov_b64 s[4:5], 0 1666*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v3, v0, v0 1667611212fcSMatt Arsenault; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start 1668611212fcSMatt Arsenault; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 1669611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 1670*eeac0ffaSNikita Popov; GFX940-NEXT: v_max_f32_e32 v0, v1, v1 1671*eeac0ffaSNikita Popov; GFX940-NEXT: v_min_f32_e32 v0, v0, v3 1672*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] 1673611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 1674*eeac0ffaSNikita Popov; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 1675611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 1676611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1677*eeac0ffaSNikita Popov; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 1678611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1679*eeac0ffaSNikita Popov; GFX940-NEXT: v_mov_b32_e32 v1, v4 1680611212fcSMatt Arsenault; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] 1681611212fcSMatt Arsenault; GFX940-NEXT: s_cbranch_execnz .LBB13_1 1682611212fcSMatt Arsenault; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end 1683611212fcSMatt Arsenault; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] 1684611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1685611212fcSMatt Arsenault; 1686611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1687611212fcSMatt Arsenault; GFX11: ; %bb.0: 1688611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16896548b635SShilei Tian; GFX11-NEXT: v_mov_b32_e32 v1, s16 1690611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 169141439d5bSMatt Arsenault; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 169241439d5bSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1693611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1694611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1695611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1696611212fcSMatt Arsenault; 1697611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1698611212fcSMatt Arsenault; GFX10: ; %bb.0: 1699611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17006548b635SShilei Tian; GFX10-NEXT: v_mov_b32_e32 v1, s20 1701611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17026548b635SShilei Tian; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[16:19], 0 offen 170341439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1704611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1705611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1706611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1707611212fcSMatt Arsenault; 1708611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1709611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1710611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1711*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v2, s20 1712*eeac0ffaSNikita Popov; GFX90A-NEXT: buffer_load_dword v1, v2, s[16:19], 0 offen 17136548b635SShilei Tian; GFX90A-NEXT: s_mov_b64 s[4:5], 0 1714*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v3, v0, v0 1715611212fcSMatt Arsenault; GFX90A-NEXT: .LBB13_1: ; %atomicrmw.start 1716611212fcSMatt Arsenault; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 1717611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 1718*eeac0ffaSNikita Popov; GFX90A-NEXT: v_max_f32_e32 v0, v1, v1 1719*eeac0ffaSNikita Popov; GFX90A-NEXT: v_min_f32_e32 v0, v0, v3 1720*eeac0ffaSNikita Popov; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] 1721*eeac0ffaSNikita Popov; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 1722611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 1723611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1724*eeac0ffaSNikita Popov; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 17256548b635SShilei Tian; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1726*eeac0ffaSNikita Popov; GFX90A-NEXT: v_mov_b32_e32 v1, v4 17276548b635SShilei Tian; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] 1728611212fcSMatt Arsenault; GFX90A-NEXT: s_cbranch_execnz .LBB13_1 1729611212fcSMatt Arsenault; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 17306548b635SShilei Tian; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] 1731611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1732611212fcSMatt Arsenault; 1733611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1734611212fcSMatt Arsenault; GFX908: ; %bb.0: 1735611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1736*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v2, s20 1737*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_load_dword v1, v2, s[16:19], 0 offen 17386548b635SShilei Tian; GFX908-NEXT: s_mov_b64 s[4:5], 0 1739*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v3, v0, v0 1740611212fcSMatt Arsenault; GFX908-NEXT: .LBB13_1: ; %atomicrmw.start 1741611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1742611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1743*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f32_e32 v0, v1, v1 1744*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f32_e32 v0, v0, v3 1745*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v5, v1 1746*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v4, v0 1747*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 1748611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1749611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1750*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 17516548b635SShilei Tian; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1752*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v1, v4 17536548b635SShilei Tian; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1754611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB13_1 1755611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 17566548b635SShilei Tian; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1757611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1758611212fcSMatt Arsenault; 1759611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1760611212fcSMatt Arsenault; GFX8: ; %bb.0: 1761611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1762*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s20 1763*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_load_dword v1, v2, s[16:19], 0 offen 17646548b635SShilei Tian; GFX8-NEXT: s_mov_b64 s[4:5], 0 1765*eeac0ffaSNikita Popov; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0 1766611212fcSMatt Arsenault; GFX8-NEXT: .LBB13_1: ; %atomicrmw.start 1767611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1768611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1769611212fcSMatt Arsenault; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v1 1770*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 1771*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, v1 1772*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, v0 1773*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc 1774611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1775611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1776*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 17776548b635SShilei Tian; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1778*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, v4 17796548b635SShilei Tian; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1780611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB13_1 1781611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 17826548b635SShilei Tian; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1783611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1784611212fcSMatt Arsenault; 1785611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: 1786611212fcSMatt Arsenault; GFX7: ; %bb.0: 1787611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17886548b635SShilei Tian; GFX7-NEXT: v_mov_b32_e32 v1, s20 17896548b635SShilei Tian; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[16:19], 0 offen 1790611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 1791611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1792611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1793611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(7) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1794611212fcSMatt Arsenault ret void 1795611212fcSMatt Arsenault} 1796611212fcSMatt Arsenault 1797611212fcSMatt Arsenaultdefine double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, double %val) { 1798611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1799611212fcSMatt Arsenault; GFX12: ; %bb.0: 1800611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1801611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1802611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1803611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1804611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 1805*eeac0ffaSNikita Popov; GFX12-NEXT: v_mov_b32_e32 v6, s16 1806*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1 1807611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s4, 0 1808*eeac0ffaSNikita Popov; GFX12-NEXT: buffer_load_b64 v[0:1], v6, s[0:3], null offen 1809*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3] 1810611212fcSMatt Arsenault; GFX12-NEXT: .LBB14_1: ; %atomicrmw.start 1811611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 1812611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 1813*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 1814611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 1815*eeac0ffaSNikita Popov; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1816*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10] 1817*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[7:8], v[0:1], v[4:5] 1818*eeac0ffaSNikita Popov; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1819*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 1820*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 1821*eeac0ffaSNikita Popov; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN 1822611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 1823611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1824*eeac0ffaSNikita Popov; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10] 182586627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1826611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 182786627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1828611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 1829611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB14_1 1830611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 1831611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 183286627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1833611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 1834611212fcSMatt Arsenault; 1835611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1836611212fcSMatt Arsenault; GFX940: ; %bb.0: 1837611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18386548b635SShilei Tian; GFX940-NEXT: v_mov_b32_e32 v2, s16 1839611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 184041439d5bSMatt Arsenault; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 1841611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 1842611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 1843611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 1844611212fcSMatt Arsenault; 1845611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1846611212fcSMatt Arsenault; GFX11: ; %bb.0: 1847611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1848*eeac0ffaSNikita Popov; GFX11-NEXT: v_mov_b32_e32 v6, s16 1849*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1 1850611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s4, 0 1851*eeac0ffaSNikita Popov; GFX11-NEXT: buffer_load_b64 v[0:1], v6, s[0:3], 0 offen 1852*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3] 1853611212fcSMatt Arsenault; GFX11-NEXT: .LBB14_1: ; %atomicrmw.start 1854611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1855611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 1856*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 1857611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1858*eeac0ffaSNikita Popov; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1859*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10] 1860*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5] 1861*eeac0ffaSNikita Popov; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1862*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 1863*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 1864*eeac0ffaSNikita Popov; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc 1865611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 1866611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 1867611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 1868*eeac0ffaSNikita Popov; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10] 1869611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 1870611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1871611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 1872611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB14_1 1873611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 1874611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 1875611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 1876611212fcSMatt Arsenault; 1877611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1878611212fcSMatt Arsenault; GFX10: ; %bb.0: 1879611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18806548b635SShilei Tian; GFX10-NEXT: v_mov_b32_e32 v2, s20 1881611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 18826548b635SShilei Tian; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen glc 1883611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 1884611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 1885611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 1886611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 1887611212fcSMatt Arsenault; 1888611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1889611212fcSMatt Arsenault; GFX90A: ; %bb.0: 1890611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18916548b635SShilei Tian; GFX90A-NEXT: v_mov_b32_e32 v2, s20 18926548b635SShilei Tian; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[16:19], 0 offen glc 1893611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 1894611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 1895611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 1896611212fcSMatt Arsenault; 1897611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1898611212fcSMatt Arsenault; GFX908: ; %bb.0: 1899611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1900*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v6, s20 1901*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v2, v0 1902*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v3, v1 1903*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen 1904*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3] 19056548b635SShilei Tian; GFX908-NEXT: s_mov_b64 s[4:5], 0 1906611212fcSMatt Arsenault; GFX908-NEXT: .LBB14_1: ; %atomicrmw.start 1907611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 1908611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1909*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v10, v1 1910*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v9, v0 1911*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10] 1912*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5] 1913*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v0, v7 1914*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v1, v8 1915*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v2, v9 1916*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v3, v10 1917*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc 1918611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 1919611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 1920*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10] 19216548b635SShilei Tian; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 19226548b635SShilei Tian; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 1923611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB14_1 1924611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 19256548b635SShilei Tian; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 1926611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 1927611212fcSMatt Arsenault; 1928611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1929611212fcSMatt Arsenault; GFX8: ; %bb.0: 1930611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1931*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s20 1932*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, v0 1933*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, v1 1934*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen 1935*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3] 19366548b635SShilei Tian; GFX8-NEXT: s_mov_b64 s[4:5], 0 1937611212fcSMatt Arsenault; GFX8-NEXT: .LBB14_1: ; %atomicrmw.start 1938611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1939611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1940*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v10, v1 1941*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v0 1942*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[0:1], v[9:10], v[9:10] 1943*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[7:8], v[0:1], v[4:5] 1944*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, v7 1945*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, v8 1946*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, v9 1947*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, v10 1948*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc 1949611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 1950611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 1951*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10] 19526548b635SShilei Tian; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 19536548b635SShilei Tian; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1954611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB14_1 1955611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 19566548b635SShilei Tian; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1957611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1958611212fcSMatt Arsenault; 1959611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: 1960611212fcSMatt Arsenault; GFX7: ; %bb.0: 1961611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19626548b635SShilei Tian; GFX7-NEXT: v_mov_b32_e32 v2, s20 19636548b635SShilei Tian; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen glc 1964611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 1965611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 1966611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1967611212fcSMatt Arsenault %result = atomicrmw fmin ptr addrspace(7) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1968611212fcSMatt Arsenault ret double %result 1969611212fcSMatt Arsenault} 1970611212fcSMatt Arsenault 1971611212fcSMatt Arsenaultdefine void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, double %val) { 1972611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 1973611212fcSMatt Arsenault; GFX12: ; %bb.0: 1974611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1975611212fcSMatt Arsenault; GFX12-NEXT: s_wait_expcnt 0x0 1976611212fcSMatt Arsenault; GFX12-NEXT: s_wait_samplecnt 0x0 1977611212fcSMatt Arsenault; GFX12-NEXT: s_wait_bvhcnt 0x0 1978611212fcSMatt Arsenault; GFX12-NEXT: s_wait_kmcnt 0x0 1979*eeac0ffaSNikita Popov; GFX12-NEXT: v_mov_b32_e32 v6, s16 1980*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[0:1], v[0:1] 1981611212fcSMatt Arsenault; GFX12-NEXT: s_mov_b32 s4, 0 1982*eeac0ffaSNikita Popov; GFX12-NEXT: buffer_load_b64 v[2:3], v6, s[0:3], null offen 1983611212fcSMatt Arsenault; GFX12-NEXT: .LBB15_1: ; %atomicrmw.start 1984611212fcSMatt Arsenault; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 1985611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 1986*eeac0ffaSNikita Popov; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3] 1987611212fcSMatt Arsenault; GFX12-NEXT: s_wait_storecnt 0x0 1988924a64a3SPierre van Houtryve; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1989*eeac0ffaSNikita Popov; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] 1990*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2 1991*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0 1992*eeac0ffaSNikita Popov; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN 1993611212fcSMatt Arsenault; GFX12-NEXT: s_wait_loadcnt 0x0 1994611212fcSMatt Arsenault; GFX12-NEXT: global_inv scope:SCOPE_DEV 1995*eeac0ffaSNikita Popov; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3] 1996*eeac0ffaSNikita Popov; GFX12-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8 199786627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 1998611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 199986627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 2000611212fcSMatt Arsenault; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 2001611212fcSMatt Arsenault; GFX12-NEXT: s_cbranch_execnz .LBB15_1 2002611212fcSMatt Arsenault; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end 2003611212fcSMatt Arsenault; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 200486627149SCarl Ritson; GFX12-NEXT: s_wait_alu 0xfffe 2005611212fcSMatt Arsenault; GFX12-NEXT: s_setpc_b64 s[30:31] 2006611212fcSMatt Arsenault; 2007611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2008611212fcSMatt Arsenault; GFX940: ; %bb.0: 2009611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20106548b635SShilei Tian; GFX940-NEXT: v_mov_b32_e32 v2, s16 2011611212fcSMatt Arsenault; GFX940-NEXT: buffer_wbl2 sc1 201241439d5bSMatt Arsenault; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen 2013611212fcSMatt Arsenault; GFX940-NEXT: s_waitcnt vmcnt(0) 2014611212fcSMatt Arsenault; GFX940-NEXT: buffer_inv sc1 2015611212fcSMatt Arsenault; GFX940-NEXT: s_setpc_b64 s[30:31] 2016611212fcSMatt Arsenault; 2017611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2018611212fcSMatt Arsenault; GFX11: ; %bb.0: 2019611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2020*eeac0ffaSNikita Popov; GFX11-NEXT: v_mov_b32_e32 v6, s16 2021*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] 2022611212fcSMatt Arsenault; GFX11-NEXT: s_mov_b32 s4, 0 2023*eeac0ffaSNikita Popov; GFX11-NEXT: buffer_load_b64 v[2:3], v6, s[0:3], 0 offen 2024611212fcSMatt Arsenault; GFX11-NEXT: .LBB15_1: ; %atomicrmw.start 2025611212fcSMatt Arsenault; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 2026611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 2027*eeac0ffaSNikita Popov; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 2028611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2029611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2030*eeac0ffaSNikita Popov; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2031*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2 2032*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0 2033*eeac0ffaSNikita Popov; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc 2034611212fcSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 2035611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl1_inv 2036611212fcSMatt Arsenault; GFX11-NEXT: buffer_gl0_inv 2037*eeac0ffaSNikita Popov; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3] 2038*eeac0ffaSNikita Popov; GFX11-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8 2039611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 2040611212fcSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2041611212fcSMatt Arsenault; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 2042611212fcSMatt Arsenault; GFX11-NEXT: s_cbranch_execnz .LBB15_1 2043611212fcSMatt Arsenault; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end 2044611212fcSMatt Arsenault; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 2045611212fcSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 2046611212fcSMatt Arsenault; 2047611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2048611212fcSMatt Arsenault; GFX10: ; %bb.0: 2049611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20506548b635SShilei Tian; GFX10-NEXT: v_mov_b32_e32 v2, s20 2051611212fcSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 20526548b635SShilei Tian; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen 205341439d5bSMatt Arsenault; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2054611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl1_inv 2055611212fcSMatt Arsenault; GFX10-NEXT: buffer_gl0_inv 2056611212fcSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 2057611212fcSMatt Arsenault; 2058611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2059611212fcSMatt Arsenault; GFX90A: ; %bb.0: 2060611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20616548b635SShilei Tian; GFX90A-NEXT: v_mov_b32_e32 v2, s20 20626548b635SShilei Tian; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[16:19], 0 offen 2063611212fcSMatt Arsenault; GFX90A-NEXT: s_waitcnt vmcnt(0) 2064611212fcSMatt Arsenault; GFX90A-NEXT: buffer_wbinvl1 2065611212fcSMatt Arsenault; GFX90A-NEXT: s_setpc_b64 s[30:31] 2066611212fcSMatt Arsenault; 2067611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2068611212fcSMatt Arsenault; GFX908: ; %bb.0: 2069611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2070*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v6, s20 2071*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_load_dwordx2 v[2:3], v6, s[16:19], 0 offen 2072*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] 20736548b635SShilei Tian; GFX908-NEXT: s_mov_b64 s[4:5], 0 2074611212fcSMatt Arsenault; GFX908-NEXT: .LBB15_1: ; %atomicrmw.start 2075611212fcSMatt Arsenault; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 2076611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 2077*eeac0ffaSNikita Popov; GFX908-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 2078*eeac0ffaSNikita Popov; GFX908-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2079*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v10, v3 2080*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v9, v2 2081*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v8, v1 2082*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v7, v0 2083*eeac0ffaSNikita Popov; GFX908-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc 2084611212fcSMatt Arsenault; GFX908-NEXT: s_waitcnt vmcnt(0) 2085611212fcSMatt Arsenault; GFX908-NEXT: buffer_wbinvl1 2086*eeac0ffaSNikita Popov; GFX908-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3] 2087*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v2, v7 20886548b635SShilei Tian; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2089*eeac0ffaSNikita Popov; GFX908-NEXT: v_mov_b32_e32 v3, v8 20906548b635SShilei Tian; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] 2091611212fcSMatt Arsenault; GFX908-NEXT: s_cbranch_execnz .LBB15_1 2092611212fcSMatt Arsenault; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end 20936548b635SShilei Tian; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] 2094611212fcSMatt Arsenault; GFX908-NEXT: s_setpc_b64 s[30:31] 2095611212fcSMatt Arsenault; 2096611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2097611212fcSMatt Arsenault; GFX8: ; %bb.0: 2098611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2099*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s20 2100*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_load_dwordx2 v[2:3], v6, s[16:19], 0 offen 2101*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] 21026548b635SShilei Tian; GFX8-NEXT: s_mov_b64 s[4:5], 0 2103611212fcSMatt Arsenault; GFX8-NEXT: .LBB15_1: ; %atomicrmw.start 2104611212fcSMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2105611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 2106*eeac0ffaSNikita Popov; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] 2107*eeac0ffaSNikita Popov; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2108*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v10, v3 2109*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v2 2110*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v1 2111*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, v0 2112*eeac0ffaSNikita Popov; GFX8-NEXT: buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc 2113611212fcSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 2114611212fcSMatt Arsenault; GFX8-NEXT: buffer_wbinvl1 2115*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3] 2116*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, v7 21176548b635SShilei Tian; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2118*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, v8 21196548b635SShilei Tian; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 2120611212fcSMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB15_1 2121611212fcSMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 21226548b635SShilei Tian; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 2123611212fcSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2124611212fcSMatt Arsenault; 2125611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: 2126611212fcSMatt Arsenault; GFX7: ; %bb.0: 2127611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21286548b635SShilei Tian; GFX7-NEXT: v_mov_b32_e32 v2, s20 21296548b635SShilei Tian; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen 2130611212fcSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 2131611212fcSMatt Arsenault; GFX7-NEXT: buffer_wbinvl1 2132611212fcSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2133611212fcSMatt Arsenault %unused = atomicrmw fmin ptr addrspace(7) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 2134611212fcSMatt Arsenault ret void 2135611212fcSMatt Arsenault} 2136611212fcSMatt Arsenault 2137611212fcSMatt Arsenault!0 = !{} 21381d037087SMatt Arsenault!1 = !{i32 5, i32 6} 2139