1b0a25468SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 25a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GFX7 %s 35a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s 45a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s 5b0a25468SMatt Arsenault 6b0a25468SMatt Arsenault; --------------------------------------------------------------------- 7b0a25468SMatt Arsenault; atomicrmw xchg 8b0a25468SMatt Arsenault; --------------------------------------------------------------------- 9b0a25468SMatt Arsenault 10b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret(ptr %ptr, i64 %in) { 11b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret: 12b0a25468SMatt Arsenault; GFX7: ; %bb.0: 13b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 15b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 16b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 17b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 18b0a25468SMatt Arsenault; 19b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret: 20b0a25468SMatt Arsenault; GFX8: ; %bb.0: 21b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 23b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 24b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 25b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 26b0a25468SMatt Arsenault; 27b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret: 28b0a25468SMatt Arsenault; GFX9: ; %bb.0: 29b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 31b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 32b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 33b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 34b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 35b0a25468SMatt Arsenault ret void 36b0a25468SMatt Arsenault} 37b0a25468SMatt Arsenault 38b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret_offset(ptr %out, i64 %in) { 39b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset: 40b0a25468SMatt Arsenault; GFX7: ; %bb.0: 41b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 43b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 44b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 45b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 46b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 47b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 48b0a25468SMatt Arsenault; 49b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset: 50b0a25468SMatt Arsenault; GFX8: ; %bb.0: 51b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 53b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 54b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 55b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 56b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 57b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 58b0a25468SMatt Arsenault; 59b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset: 60b0a25468SMatt Arsenault; GFX9: ; %bb.0: 61b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] offset:32 63b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 64b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 65b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 66b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 67b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 68b0a25468SMatt Arsenault ret void 69b0a25468SMatt Arsenault} 70b0a25468SMatt Arsenault 71b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret(ptr %ptr, i64 %in) { 72b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret: 73b0a25468SMatt Arsenault; GFX7: ; %bb.0: 74b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 76b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 77b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 78b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 79b0a25468SMatt Arsenault; 80b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret: 81b0a25468SMatt Arsenault; GFX8: ; %bb.0: 82b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 84b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 85b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 86b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 87b0a25468SMatt Arsenault; 88b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret: 89b0a25468SMatt Arsenault; GFX9: ; %bb.0: 90b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 92b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 93b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 94b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 95b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 96b0a25468SMatt Arsenault ret i64 %result 97b0a25468SMatt Arsenault} 98b0a25468SMatt Arsenault 99b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret_offset(ptr %out, i64 %in) { 100b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset: 101b0a25468SMatt Arsenault; GFX7: ; %bb.0: 102b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 104b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 105b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 106b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 107b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 108b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 109b0a25468SMatt Arsenault; 110b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset: 111b0a25468SMatt Arsenault; GFX8: ; %bb.0: 112b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 114b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 115b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 116b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 117b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 118b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 119b0a25468SMatt Arsenault; 120b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset: 121b0a25468SMatt Arsenault; GFX9: ; %bb.0: 122b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 124b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 125b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 126b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 127b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 128b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 129b0a25468SMatt Arsenault ret i64 %result 130b0a25468SMatt Arsenault} 131b0a25468SMatt Arsenault 132b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 133b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_scalar: 134b0a25468SMatt Arsenault; GFX7: ; %bb.0: 135b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 137b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 138b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 139b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 140b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 141b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 142b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 143b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 144b0a25468SMatt Arsenault; 145b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_scalar: 146b0a25468SMatt Arsenault; GFX8: ; %bb.0: 147b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 149b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 150b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 151b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 152b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 153b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 154b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 155b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 156b0a25468SMatt Arsenault; 157b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_scalar: 158b0a25468SMatt Arsenault; GFX9: ; %bb.0: 159b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 161b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 162b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 163b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 164b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 165b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 166b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 167b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 168b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 169b0a25468SMatt Arsenault ret void 170b0a25468SMatt Arsenault} 171b0a25468SMatt Arsenault 172b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 173b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset_scalar: 174b0a25468SMatt Arsenault; GFX7: ; %bb.0: 175b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 177b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 178b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 179b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 180b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 181b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 182b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 183b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 184b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 185b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 186b0a25468SMatt Arsenault; 187b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset_scalar: 188b0a25468SMatt Arsenault; GFX8: ; %bb.0: 189b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 191b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 192b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 193b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 194b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 195b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 196b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 197b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 198b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 199b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 200b0a25468SMatt Arsenault; 201b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset_scalar: 202b0a25468SMatt Arsenault; GFX9: ; %bb.0: 203b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 205b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 206b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 207b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 208b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] offset:32 209b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 210b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 211b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 212b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 213b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 214b0a25468SMatt Arsenault ret void 215b0a25468SMatt Arsenault} 216b0a25468SMatt Arsenault 217b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 218b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_scalar: 219b0a25468SMatt Arsenault; GFX7: ; %bb.0: 220b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 222b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 223b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 224b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 225b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 226b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 227b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 228b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 229b0a25468SMatt Arsenault; 230b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_scalar: 231b0a25468SMatt Arsenault; GFX8: ; %bb.0: 232b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 234b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 235b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 236b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 237b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 238b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 239b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 240b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 241b0a25468SMatt Arsenault; 242b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_scalar: 243b0a25468SMatt Arsenault; GFX9: ; %bb.0: 244b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 246b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 247b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 248b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 249b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 250b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 251b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 252b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 253b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 254b0a25468SMatt Arsenault ret i64 %result 255b0a25468SMatt Arsenault} 256b0a25468SMatt Arsenault 257b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 258b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset_scalar: 259b0a25468SMatt Arsenault; GFX7: ; %bb.0: 260b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 262b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 263b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 264b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 265b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 266b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 267b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 268b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 269b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 270b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 271b0a25468SMatt Arsenault; 272b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset_scalar: 273b0a25468SMatt Arsenault; GFX8: ; %bb.0: 274b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 276b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 277b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 278b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 279b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 280b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 281b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 282b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 283b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 284b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 285b0a25468SMatt Arsenault; 286b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset_scalar: 287b0a25468SMatt Arsenault; GFX9: ; %bb.0: 288b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 290b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 291b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 292b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 293b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 294b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 295b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 296b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 297b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 298b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 299b0a25468SMatt Arsenault ret i64 %result 300b0a25468SMatt Arsenault} 301b0a25468SMatt Arsenault 302b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 303b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory: 304b0a25468SMatt Arsenault; GFX7: ; %bb.0: 305b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 307b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 308b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 309b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 310b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 311b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 312b0a25468SMatt Arsenault; 313b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory: 314b0a25468SMatt Arsenault; GFX8: ; %bb.0: 315b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 317b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 318b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 319b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 320b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 321b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 322b0a25468SMatt Arsenault; 323b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory: 324b0a25468SMatt Arsenault; GFX9: ; %bb.0: 325b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] offset:32 327b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 328b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 329b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 330b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 331b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 332b0a25468SMatt Arsenault ret void 333b0a25468SMatt Arsenault} 334b0a25468SMatt Arsenault 335b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 336b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory: 337b0a25468SMatt Arsenault; GFX7: ; %bb.0: 338b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 340b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 341b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 342b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 343b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 344b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 345b0a25468SMatt Arsenault; 346b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory: 347b0a25468SMatt Arsenault; GFX8: ; %bb.0: 348b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 350b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 351b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 352b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 353b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 354b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 355b0a25468SMatt Arsenault; 356b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory: 357b0a25468SMatt Arsenault; GFX9: ; %bb.0: 358b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 360b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 361b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 362b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 363b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 364b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 365b0a25468SMatt Arsenault ret i64 %result 366b0a25468SMatt Arsenault} 367b0a25468SMatt Arsenault 368b0a25468SMatt Arsenault; --------------------------------------------------------------------- 369b0a25468SMatt Arsenault; atomicrmw xchg f64 370b0a25468SMatt Arsenault; --------------------------------------------------------------------- 371b0a25468SMatt Arsenault 372b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret(ptr %ptr, double %in) { 373b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret: 374b0a25468SMatt Arsenault; GFX7: ; %bb.0: 375b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 376b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 377b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 378b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 379b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 380b0a25468SMatt Arsenault; 381b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret: 382b0a25468SMatt Arsenault; GFX8: ; %bb.0: 383b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 385b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 386b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 387b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 388b0a25468SMatt Arsenault; 389b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret: 390b0a25468SMatt Arsenault; GFX9: ; %bb.0: 391b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 392b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 393b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 394b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 395b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 396b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1 397b0a25468SMatt Arsenault ret void 398b0a25468SMatt Arsenault} 399b0a25468SMatt Arsenault 400b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret_offset(ptr %out, double %in) { 401b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset: 402b0a25468SMatt Arsenault; GFX7: ; %bb.0: 403b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 405b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 406b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 407b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 408b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 409b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 410b0a25468SMatt Arsenault; 411b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset: 412b0a25468SMatt Arsenault; GFX8: ; %bb.0: 413b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 415b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 416b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 417b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 418b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 419b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 420b0a25468SMatt Arsenault; 421b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset: 422b0a25468SMatt Arsenault; GFX9: ; %bb.0: 423b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 424b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] offset:32 425b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 426b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 427b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 428b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i32 4 429b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 430b0a25468SMatt Arsenault ret void 431b0a25468SMatt Arsenault} 432b0a25468SMatt Arsenault 433b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret(ptr %ptr, double %in) { 434b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret: 435b0a25468SMatt Arsenault; GFX7: ; %bb.0: 436b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 438b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 439b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 440b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 441b0a25468SMatt Arsenault; 442b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret: 443b0a25468SMatt Arsenault; GFX8: ; %bb.0: 444b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 446b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 447b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 448b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 449b0a25468SMatt Arsenault; 450b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret: 451b0a25468SMatt Arsenault; GFX9: ; %bb.0: 452b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 454b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 455b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 456b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 457b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1 458b0a25468SMatt Arsenault ret double %result 459b0a25468SMatt Arsenault} 460b0a25468SMatt Arsenault 461b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret_offset(ptr %out, double %in) { 462b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset: 463b0a25468SMatt Arsenault; GFX7: ; %bb.0: 464b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 466b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 467b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 468b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 469b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 470b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 471b0a25468SMatt Arsenault; 472b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset: 473b0a25468SMatt Arsenault; GFX8: ; %bb.0: 474b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 476b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 477b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 478b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 479b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 480b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 481b0a25468SMatt Arsenault; 482b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset: 483b0a25468SMatt Arsenault; GFX9: ; %bb.0: 484b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 485b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 486b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 487b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 488b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 489b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i32 4 490b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 491b0a25468SMatt Arsenault ret double %result 492b0a25468SMatt Arsenault} 493b0a25468SMatt Arsenault 494b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_f64_noret_scalar(ptr inreg %ptr, double inreg %in) { 495b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_scalar: 496b0a25468SMatt Arsenault; GFX7: ; %bb.0: 497b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 499b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 500b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 501b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 502b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 503b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 504b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 505b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 506b0a25468SMatt Arsenault; 507b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_scalar: 508b0a25468SMatt Arsenault; GFX8: ; %bb.0: 509b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 511b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 512b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 513b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 514b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 515b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 516b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 517b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 518b0a25468SMatt Arsenault; 519b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_scalar: 520b0a25468SMatt Arsenault; GFX9: ; %bb.0: 521b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 523b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 524b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 525b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 526b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 527b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 528b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 529b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 530b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1 531b0a25468SMatt Arsenault ret void 532b0a25468SMatt Arsenault} 533b0a25468SMatt Arsenault 534b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_f64_noret_offset_scalar(ptr inreg %out, double inreg %in) { 535b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset_scalar: 536b0a25468SMatt Arsenault; GFX7: ; %bb.0: 537b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 538b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 539b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 540b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 541b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 542b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 543b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 544b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 545b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 546b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 547b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 548b0a25468SMatt Arsenault; 549b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset_scalar: 550b0a25468SMatt Arsenault; GFX8: ; %bb.0: 551b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 553b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 554b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 555b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 556b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 557b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 558b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] 559b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 560b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 561b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 562b0a25468SMatt Arsenault; 563b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset_scalar: 564b0a25468SMatt Arsenault; GFX9: ; %bb.0: 565b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 567b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 568b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 569b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 570b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] offset:32 571b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 572b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 573b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 574b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i32 4 575b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 576b0a25468SMatt Arsenault ret void 577b0a25468SMatt Arsenault} 578b0a25468SMatt Arsenault 579b0a25468SMatt Arsenaultdefine amdgpu_gfx double @flat_atomic_xchg_f64_ret_scalar(ptr inreg %ptr, double inreg %in) { 580b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_scalar: 581b0a25468SMatt Arsenault; GFX7: ; %bb.0: 582b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 584b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 585b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 586b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 587b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 588b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 589b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 590b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 591b0a25468SMatt Arsenault; 592b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_scalar: 593b0a25468SMatt Arsenault; GFX8: ; %bb.0: 594b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 596b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 597b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 598b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 599b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 600b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 601b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 602b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 603b0a25468SMatt Arsenault; 604b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_scalar: 605b0a25468SMatt Arsenault; GFX9: ; %bb.0: 606b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 608b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 609b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 610b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 611b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 612b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 613b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 614b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 615b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1 616b0a25468SMatt Arsenault ret double %result 617b0a25468SMatt Arsenault} 618b0a25468SMatt Arsenault 619b0a25468SMatt Arsenaultdefine amdgpu_gfx double @flat_atomic_xchg_f64_ret_offset_scalar(ptr inreg %out, double inreg %in) { 620b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset_scalar: 621b0a25468SMatt Arsenault; GFX7: ; %bb.0: 622b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 624b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 625b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 626b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 627b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 628b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 629b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 630b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 631b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 632b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 633b0a25468SMatt Arsenault; 634b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset_scalar: 635b0a25468SMatt Arsenault; GFX8: ; %bb.0: 636b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 638b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 639b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 640b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 641b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 642b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 643b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc 644b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 645b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 646b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 647b0a25468SMatt Arsenault; 648b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset_scalar: 649b0a25468SMatt Arsenault; GFX9: ; %bb.0: 650b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 651b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 652b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 653b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 654b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 655b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 656b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 657b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 658b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 659b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i32 4 660b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 661b0a25468SMatt Arsenault ret double %result 662b0a25468SMatt Arsenault} 663b0a25468SMatt Arsenault 664b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory(ptr %out, double %in) { 665b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory: 666b0a25468SMatt Arsenault; GFX7: ; %bb.0: 667b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 668b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 669b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 670b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 671b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 672b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 673b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 674b0a25468SMatt Arsenault; 675b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory: 676b0a25468SMatt Arsenault; GFX8: ; %bb.0: 677b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 679b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 680b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] 681b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 682b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 683b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 684b0a25468SMatt Arsenault; 685b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory: 686b0a25468SMatt Arsenault; GFX9: ; %bb.0: 687b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] offset:32 689b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 690b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 691b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 692b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i64 4 693b0a25468SMatt Arsenault %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 694b0a25468SMatt Arsenault ret void 695b0a25468SMatt Arsenault} 696b0a25468SMatt Arsenault 697b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory(ptr %out, double %in) { 698b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory: 699b0a25468SMatt Arsenault; GFX7: ; %bb.0: 700b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 701b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 702b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 703b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 704b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 705b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 706b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 707b0a25468SMatt Arsenault; 708b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory: 709b0a25468SMatt Arsenault; GFX8: ; %bb.0: 710b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 711b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 712b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 713b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc 714b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 715b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 716b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 717b0a25468SMatt Arsenault; 718b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory: 719b0a25468SMatt Arsenault; GFX9: ; %bb.0: 720b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 721b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 722b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 723b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 724b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 725b0a25468SMatt Arsenault %gep = getelementptr double, ptr %out, i64 4 726b0a25468SMatt Arsenault %result = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 727b0a25468SMatt Arsenault ret double %result 728b0a25468SMatt Arsenault} 729b0a25468SMatt Arsenault 730b0a25468SMatt Arsenault; --------------------------------------------------------------------- 731b0a25468SMatt Arsenault; atomicrmw add 732b0a25468SMatt Arsenault; --------------------------------------------------------------------- 733b0a25468SMatt Arsenault 734b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret(ptr %ptr, i64 %in) { 735b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret: 736b0a25468SMatt Arsenault; GFX7: ; %bb.0: 737b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 738b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 739b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 740b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 741b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 742b0a25468SMatt Arsenault; 743b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret: 744b0a25468SMatt Arsenault; GFX8: ; %bb.0: 745b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 746b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 747b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 748b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 749b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 750b0a25468SMatt Arsenault; 751b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret: 752b0a25468SMatt Arsenault; GFX9: ; %bb.0: 753b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 755b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 756b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 757b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 758b0a25468SMatt Arsenault %tmp0 = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 759b0a25468SMatt Arsenault ret void 760b0a25468SMatt Arsenault} 761b0a25468SMatt Arsenault 762b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret_offset(ptr %out, i64 %in) { 763b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset: 764b0a25468SMatt Arsenault; GFX7: ; %bb.0: 765b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 766b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 767b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 768b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 769b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 770b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 771b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 772b0a25468SMatt Arsenault; 773b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset: 774b0a25468SMatt Arsenault; GFX8: ; %bb.0: 775b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 776b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 777b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 778b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 779b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 780b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 781b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 782b0a25468SMatt Arsenault; 783b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset: 784b0a25468SMatt Arsenault; GFX9: ; %bb.0: 785b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] offset:32 787b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 788b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 789b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 790b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 791b0a25468SMatt Arsenault %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 792b0a25468SMatt Arsenault ret void 793b0a25468SMatt Arsenault} 794b0a25468SMatt Arsenault 795b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret(ptr %ptr, i64 %in) { 796b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret: 797b0a25468SMatt Arsenault; GFX7: ; %bb.0: 798b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 800b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 801b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 802b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 803b0a25468SMatt Arsenault; 804b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret: 805b0a25468SMatt Arsenault; GFX8: ; %bb.0: 806b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 807b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 808b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 809b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 810b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 811b0a25468SMatt Arsenault; 812b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret: 813b0a25468SMatt Arsenault; GFX9: ; %bb.0: 814b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 815b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 816b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 817b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 818b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 819b0a25468SMatt Arsenault %result = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 820b0a25468SMatt Arsenault ret i64 %result 821b0a25468SMatt Arsenault} 822b0a25468SMatt Arsenault 823b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret_offset(ptr %out, i64 %in) { 824b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset: 825b0a25468SMatt Arsenault; GFX7: ; %bb.0: 826b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 827b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 828b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 829b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 830b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 831b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 832b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 833b0a25468SMatt Arsenault; 834b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset: 835b0a25468SMatt Arsenault; GFX8: ; %bb.0: 836b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 838b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 839b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 840b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 841b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 842b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 843b0a25468SMatt Arsenault; 844b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset: 845b0a25468SMatt Arsenault; GFX9: ; %bb.0: 846b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 848b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 849b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 850b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 851b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 852b0a25468SMatt Arsenault %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 853b0a25468SMatt Arsenault ret i64 %result 854b0a25468SMatt Arsenault} 855b0a25468SMatt Arsenault 856b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_add_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 857b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_scalar: 858b0a25468SMatt Arsenault; GFX7: ; %bb.0: 859b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 861b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 862b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 863b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 864b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] 865b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 866b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 867b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 868b0a25468SMatt Arsenault; 869b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_scalar: 870b0a25468SMatt Arsenault; GFX8: ; %bb.0: 871b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 873b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 874b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 875b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 876b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] 877b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 878b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 879b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 880b0a25468SMatt Arsenault; 881b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_scalar: 882b0a25468SMatt Arsenault; GFX9: ; %bb.0: 883b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 885b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 886b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 887b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 888b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] 889b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 890b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 891b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 892b0a25468SMatt Arsenault %tmp0 = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 893b0a25468SMatt Arsenault ret void 894b0a25468SMatt Arsenault} 895b0a25468SMatt Arsenault 896b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_add_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 897b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset_scalar: 898b0a25468SMatt Arsenault; GFX7: ; %bb.0: 899b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 900b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 901b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 902b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 903b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 904b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 905b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 906b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] 907b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 908b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 909b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 910b0a25468SMatt Arsenault; 911b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset_scalar: 912b0a25468SMatt Arsenault; GFX8: ; %bb.0: 913b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 915b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 916b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 917b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 918b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 919b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 920b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] 921b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 922b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 923b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 924b0a25468SMatt Arsenault; 925b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset_scalar: 926b0a25468SMatt Arsenault; GFX9: ; %bb.0: 927b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 928b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 929b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 930b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 931b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 932b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] offset:32 933b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 934b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 935b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 936b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 937b0a25468SMatt Arsenault %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 938b0a25468SMatt Arsenault ret void 939b0a25468SMatt Arsenault} 940b0a25468SMatt Arsenault 941b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_add_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 942b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_scalar: 943b0a25468SMatt Arsenault; GFX7: ; %bb.0: 944b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 945b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 946b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 947b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 948b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 949b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc 950b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 951b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 952b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 953b0a25468SMatt Arsenault; 954b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_scalar: 955b0a25468SMatt Arsenault; GFX8: ; %bb.0: 956b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 957b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 958b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 959b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 960b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 961b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc 962b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 963b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 964b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 965b0a25468SMatt Arsenault; 966b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_scalar: 967b0a25468SMatt Arsenault; GFX9: ; %bb.0: 968b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 969b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 970b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 971b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 972b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 973b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc 974b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 975b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 976b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 977b0a25468SMatt Arsenault %result = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 978b0a25468SMatt Arsenault ret i64 %result 979b0a25468SMatt Arsenault} 980b0a25468SMatt Arsenault 981b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_add_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 982b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset_scalar: 983b0a25468SMatt Arsenault; GFX7: ; %bb.0: 984b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 986b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 987b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 988b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 989b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 990b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 991b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc 992b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 993b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 994b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 995b0a25468SMatt Arsenault; 996b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset_scalar: 997b0a25468SMatt Arsenault; GFX8: ; %bb.0: 998b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 999b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 1000b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 1001b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 1002b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1003b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1004b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 1005b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc 1006b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1007b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1008b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1009b0a25468SMatt Arsenault; 1010b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset_scalar: 1011b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1012b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1013b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1014b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1015b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1016b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1017b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 1018b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1019b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1020b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1021b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1022b0a25468SMatt Arsenault %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1023b0a25468SMatt Arsenault ret i64 %result 1024b0a25468SMatt Arsenault} 1025b0a25468SMatt Arsenault 1026b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1027b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory: 1028b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1029b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1031b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1032b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 1033b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1034b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1035b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1036b0a25468SMatt Arsenault; 1037b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory: 1038b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1039b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1040b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1041b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1042b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] 1043b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1044b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1045b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1046b0a25468SMatt Arsenault; 1047b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory: 1048b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1049b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1050b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] offset:32 1051b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1052b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1053b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1054b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1055b0a25468SMatt Arsenault %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1056b0a25468SMatt Arsenault ret void 1057b0a25468SMatt Arsenault} 1058b0a25468SMatt Arsenault 1059b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1060b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory: 1061b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1062b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1064b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1065b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 1066b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1067b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1068b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1069b0a25468SMatt Arsenault; 1070b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory: 1071b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1072b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1073b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1074b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1075b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc 1076b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1077b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1078b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1079b0a25468SMatt Arsenault; 1080b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory: 1081b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1082b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1083b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 1084b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1085b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1086b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1087b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1088b0a25468SMatt Arsenault %result = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1089b0a25468SMatt Arsenault ret i64 %result 1090b0a25468SMatt Arsenault} 1091b0a25468SMatt Arsenault 1092b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1093b0a25468SMatt Arsenault; atomicrmw sub 1094b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1095b0a25468SMatt Arsenault 1096b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret(ptr %ptr, i64 %in) { 1097b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret: 1098b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1099b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1100b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1101b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1102b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1103b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1104b0a25468SMatt Arsenault; 1105b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret: 1106b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1107b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1108b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1109b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1110b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1111b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1112b0a25468SMatt Arsenault; 1113b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret: 1114b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1115b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1116b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1117b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1118b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1119b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1120b0a25468SMatt Arsenault %tmp0 = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1121b0a25468SMatt Arsenault ret void 1122b0a25468SMatt Arsenault} 1123b0a25468SMatt Arsenault 1124b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret_offset(ptr %out, i64 %in) { 1125b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset: 1126b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1127b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1129b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1130b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1131b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1132b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1133b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1134b0a25468SMatt Arsenault; 1135b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset: 1136b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1137b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1139b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1140b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1141b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1142b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1143b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1144b0a25468SMatt Arsenault; 1145b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset: 1146b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1147b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1148b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] offset:32 1149b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1150b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1151b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1152b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1153b0a25468SMatt Arsenault %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1154b0a25468SMatt Arsenault ret void 1155b0a25468SMatt Arsenault} 1156b0a25468SMatt Arsenault 1157b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret(ptr %ptr, i64 %in) { 1158b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret: 1159b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1160b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1161b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1162b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1163b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1164b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1165b0a25468SMatt Arsenault; 1166b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret: 1167b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1168b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1169b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1170b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1171b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1172b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1173b0a25468SMatt Arsenault; 1174b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret: 1175b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1176b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1177b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1178b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1179b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1180b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1181b0a25468SMatt Arsenault %result = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1182b0a25468SMatt Arsenault ret i64 %result 1183b0a25468SMatt Arsenault} 1184b0a25468SMatt Arsenault 1185b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret_offset(ptr %out, i64 %in) { 1186b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset: 1187b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1188b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1189b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1190b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1191b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1192b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1193b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1194b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1195b0a25468SMatt Arsenault; 1196b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset: 1197b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1198b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1199b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1200b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1201b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1202b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1203b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1204b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1205b0a25468SMatt Arsenault; 1206b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset: 1207b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1208b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 1210b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1211b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1212b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1213b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1214b0a25468SMatt Arsenault %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1215b0a25468SMatt Arsenault ret i64 %result 1216b0a25468SMatt Arsenault} 1217b0a25468SMatt Arsenault 1218b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_sub_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 1219b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_scalar: 1220b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1221b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1222b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1223b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1224b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 1225b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 1226b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] 1227b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1228b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1229b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1230b0a25468SMatt Arsenault; 1231b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_scalar: 1232b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1233b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1234b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1235b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1236b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 1237b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 1238b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] 1239b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1240b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1241b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1242b0a25468SMatt Arsenault; 1243b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_scalar: 1244b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1245b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1246b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1247b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1248b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1249b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1250b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] 1251b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1252b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1253b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1254b0a25468SMatt Arsenault %tmp0 = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1255b0a25468SMatt Arsenault ret void 1256b0a25468SMatt Arsenault} 1257b0a25468SMatt Arsenault 1258b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_sub_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 1259b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset_scalar: 1260b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1261b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1262b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 1263b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 1264b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 1265b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1266b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1267b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 1268b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] 1269b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1270b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1271b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1272b0a25468SMatt Arsenault; 1273b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset_scalar: 1274b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1275b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1276b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 1277b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 1278b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 1279b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1280b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1281b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 1282b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] 1283b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1284b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1285b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1286b0a25468SMatt Arsenault; 1287b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset_scalar: 1288b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1289b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1291b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1292b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1293b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1294b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] offset:32 1295b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1296b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1297b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1298b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1299b0a25468SMatt Arsenault %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1300b0a25468SMatt Arsenault ret void 1301b0a25468SMatt Arsenault} 1302b0a25468SMatt Arsenault 1303b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_sub_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 1304b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_scalar: 1305b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1306b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1307b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1308b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1309b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 1310b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 1311b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc 1312b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1313b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1314b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1315b0a25468SMatt Arsenault; 1316b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_scalar: 1317b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1318b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1319b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1320b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1321b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 1322b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 1323b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc 1324b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1325b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1326b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1327b0a25468SMatt Arsenault; 1328b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_scalar: 1329b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1330b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1332b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1333b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1334b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1335b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc 1336b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1337b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1338b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1339b0a25468SMatt Arsenault %result = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1340b0a25468SMatt Arsenault ret i64 %result 1341b0a25468SMatt Arsenault} 1342b0a25468SMatt Arsenault 1343b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_sub_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 1344b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset_scalar: 1345b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1346b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1347b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 1348b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 1349b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 1350b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1351b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1352b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 1353b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc 1354b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1355b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1356b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1357b0a25468SMatt Arsenault; 1358b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset_scalar: 1359b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1360b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 1362b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 1363b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 1364b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1365b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1366b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 1367b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc 1368b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1369b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1370b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1371b0a25468SMatt Arsenault; 1372b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset_scalar: 1373b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1374b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1376b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1377b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1378b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1379b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 1380b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1381b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1382b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1383b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1384b0a25468SMatt Arsenault %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1385b0a25468SMatt Arsenault ret i64 %result 1386b0a25468SMatt Arsenault} 1387b0a25468SMatt Arsenault 1388b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1389b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory: 1390b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1391b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1393b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1394b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1395b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1396b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1397b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1398b0a25468SMatt Arsenault; 1399b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory: 1400b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1401b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1403b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1404b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] 1405b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1406b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1407b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1408b0a25468SMatt Arsenault; 1409b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory: 1410b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1411b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1412b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] offset:32 1413b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1414b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1415b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1416b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1417b0a25468SMatt Arsenault %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1418b0a25468SMatt Arsenault ret void 1419b0a25468SMatt Arsenault} 1420b0a25468SMatt Arsenault 1421b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1422b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory: 1423b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1424b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1425b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1426b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1427b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1428b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1429b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1430b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1431b0a25468SMatt Arsenault; 1432b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory: 1433b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1434b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1435b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1436b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1437b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc 1438b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1439b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1440b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1441b0a25468SMatt Arsenault; 1442b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory: 1443b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1444b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1445b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 1446b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1447b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1448b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1449b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1450b0a25468SMatt Arsenault %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1451b0a25468SMatt Arsenault ret i64 %result 1452b0a25468SMatt Arsenault} 1453b0a25468SMatt Arsenault 1454b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1455b0a25468SMatt Arsenault; atomicrmw and 1456b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1457b0a25468SMatt Arsenault 1458b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret(ptr %ptr, i64 %in) { 1459b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret: 1460b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1461b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1462b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1463b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1464b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1465b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1466b0a25468SMatt Arsenault; 1467b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret: 1468b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1469b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1470b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1471b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1472b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1473b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1474b0a25468SMatt Arsenault; 1475b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret: 1476b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1477b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1478b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1479b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1480b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1481b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1482b0a25468SMatt Arsenault %tmp0 = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1483b0a25468SMatt Arsenault ret void 1484b0a25468SMatt Arsenault} 1485b0a25468SMatt Arsenault 1486b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret_offset(ptr %out, i64 %in) { 1487b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset: 1488b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1489b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1491b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1492b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1493b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1494b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1495b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1496b0a25468SMatt Arsenault; 1497b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset: 1498b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1499b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1500b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1501b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1502b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1503b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1504b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1505b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1506b0a25468SMatt Arsenault; 1507b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset: 1508b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1509b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] offset:32 1511b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1512b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1513b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1514b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1515b0a25468SMatt Arsenault %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1516b0a25468SMatt Arsenault ret void 1517b0a25468SMatt Arsenault} 1518b0a25468SMatt Arsenault 1519b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret(ptr %ptr, i64 %in) { 1520b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret: 1521b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1522b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1523b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1524b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1525b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1526b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1527b0a25468SMatt Arsenault; 1528b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret: 1529b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1530b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1531b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1532b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1533b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1534b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1535b0a25468SMatt Arsenault; 1536b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret: 1537b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1538b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1539b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1540b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1541b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1542b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1543b0a25468SMatt Arsenault %result = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1544b0a25468SMatt Arsenault ret i64 %result 1545b0a25468SMatt Arsenault} 1546b0a25468SMatt Arsenault 1547b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret_offset(ptr %out, i64 %in) { 1548b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset: 1549b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1550b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1551b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1552b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1553b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1554b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1555b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1556b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1557b0a25468SMatt Arsenault; 1558b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset: 1559b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1560b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1561b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1562b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1563b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1564b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1565b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1566b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1567b0a25468SMatt Arsenault; 1568b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset: 1569b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1570b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1571b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 1572b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1573b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1574b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1575b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1576b0a25468SMatt Arsenault %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1577b0a25468SMatt Arsenault ret i64 %result 1578b0a25468SMatt Arsenault} 1579b0a25468SMatt Arsenault 1580b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_and_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 1581b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_scalar: 1582b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1583b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1584b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1585b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1586b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 1587b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 1588b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] 1589b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1590b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1591b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1592b0a25468SMatt Arsenault; 1593b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_scalar: 1594b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1595b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1596b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1597b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1598b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 1599b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 1600b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] 1601b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1602b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1603b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1604b0a25468SMatt Arsenault; 1605b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_scalar: 1606b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1607b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1608b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1609b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1610b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1611b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1612b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] 1613b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1614b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1615b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1616b0a25468SMatt Arsenault %tmp0 = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1617b0a25468SMatt Arsenault ret void 1618b0a25468SMatt Arsenault} 1619b0a25468SMatt Arsenault 1620b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_and_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 1621b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset_scalar: 1622b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1623b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1624b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 1625b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 1626b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 1627b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1628b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1629b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 1630b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] 1631b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1632b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1633b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1634b0a25468SMatt Arsenault; 1635b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset_scalar: 1636b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1637b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 1639b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 1640b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 1641b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1642b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1643b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 1644b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] 1645b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1646b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1647b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1648b0a25468SMatt Arsenault; 1649b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset_scalar: 1650b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1651b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1653b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1654b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1655b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1656b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] offset:32 1657b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1658b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1659b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1660b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1661b0a25468SMatt Arsenault %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1662b0a25468SMatt Arsenault ret void 1663b0a25468SMatt Arsenault} 1664b0a25468SMatt Arsenault 1665b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_and_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 1666b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_scalar: 1667b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1668b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1669b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1670b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1671b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 1672b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 1673b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc 1674b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1675b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1676b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1677b0a25468SMatt Arsenault; 1678b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_scalar: 1679b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1680b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1681b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1682b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1683b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 1684b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 1685b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc 1686b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1687b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1688b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1689b0a25468SMatt Arsenault; 1690b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_scalar: 1691b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1692b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1693b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1694b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1695b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1696b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1697b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc 1698b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1699b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1700b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1701b0a25468SMatt Arsenault %result = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1702b0a25468SMatt Arsenault ret i64 %result 1703b0a25468SMatt Arsenault} 1704b0a25468SMatt Arsenault 1705b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_and_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 1706b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset_scalar: 1707b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1708b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1709b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 1710b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 1711b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 1712b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 1713b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 1714b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 1715b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc 1716b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1717b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1718b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1719b0a25468SMatt Arsenault; 1720b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset_scalar: 1721b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1722b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1723b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 1724b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 1725b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 1726b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 1727b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 1728b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 1729b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc 1730b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1731b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1732b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1733b0a25468SMatt Arsenault; 1734b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset_scalar: 1735b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1736b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1737b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 1738b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 1739b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 1740b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 1741b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 1742b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1743b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1744b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1745b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1746b0a25468SMatt Arsenault %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1747b0a25468SMatt Arsenault ret i64 %result 1748b0a25468SMatt Arsenault} 1749b0a25468SMatt Arsenault 1750b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1751b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory: 1752b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1753b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1754b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1755b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1756b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1757b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1758b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1759b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1760b0a25468SMatt Arsenault; 1761b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory: 1762b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1763b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1764b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1765b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1766b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] 1767b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1768b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1769b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1770b0a25468SMatt Arsenault; 1771b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory: 1772b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1773b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1774b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] offset:32 1775b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1776b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1777b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1778b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1779b0a25468SMatt Arsenault %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1780b0a25468SMatt Arsenault ret void 1781b0a25468SMatt Arsenault} 1782b0a25468SMatt Arsenault 1783b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 1784b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory: 1785b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1786b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1787b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 1788b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1789b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1790b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1791b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1792b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1793b0a25468SMatt Arsenault; 1794b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory: 1795b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1796b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1797b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 1798b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1799b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc 1800b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1801b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1802b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1803b0a25468SMatt Arsenault; 1804b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory: 1805b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1806b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1807b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 1808b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1809b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1810b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1811b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1812b0a25468SMatt Arsenault %result = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 1813b0a25468SMatt Arsenault ret i64 %result 1814b0a25468SMatt Arsenault} 1815b0a25468SMatt Arsenault 1816b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1817b0a25468SMatt Arsenault; atomicrmw nand 1818b0a25468SMatt Arsenault; --------------------------------------------------------------------- 1819b0a25468SMatt Arsenault 1820b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret(ptr %ptr, i64 %in) { 1821b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret: 1822b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1823b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1824b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 4, v0 1825b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 1826b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[0:1] 1827b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[4:5] 1828b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 1829b0a25468SMatt Arsenault; GFX7-NEXT: .LBB50_1: ; %atomicrmw.start 1830b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 1831b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1832b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v4, v7, v3 1833b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v8, v6, v2 1834b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v5, v4 1835b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v4, v8 1836b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 1837b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1838b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1839b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1840b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 1841b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1842b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 1843b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 1844b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB50_1 1845b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 1846b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 1847b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1848b0a25468SMatt Arsenault; 1849b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret: 1850b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1851b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1852b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 4, v0 1853b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 1854b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[0:1] 1855b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[4:5] 1856b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1857b0a25468SMatt Arsenault; GFX8-NEXT: .LBB50_1: ; %atomicrmw.start 1858b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1859b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1860b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v4, v7, v3 1861b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v8, v6, v2 1862b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v5, v4 1863b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v4, v8 1864b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 1865b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1866b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1867b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1868b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 1869b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1870b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 1871b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1872b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB50_1 1873b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1874b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1875b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1876b0a25468SMatt Arsenault; 1877b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret: 1878b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1879b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1880b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] 1881b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 1882b0a25468SMatt Arsenault; GFX9-NEXT: .LBB50_1: ; %atomicrmw.start 1883b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1884b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1885b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 1886b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 1887b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 1888b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 1889b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 1890b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1891b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1892b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1893b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 1894b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1895b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 1896b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 1897b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB50_1 1898b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 1899b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1900b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1901b0a25468SMatt Arsenault %tmp0 = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 1902b0a25468SMatt Arsenault ret void 1903b0a25468SMatt Arsenault} 1904b0a25468SMatt Arsenault 1905b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret_offset(ptr %out, i64 %in) { 1906b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset: 1907b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1908b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1909b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 1910b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 1911b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 1912b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1913b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 1914b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 1915b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 1916b0a25468SMatt Arsenault; GFX7-NEXT: .LBB51_1: ; %atomicrmw.start 1917b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 1918b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1919b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, v7, v3 1920b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, v6, v2 1921b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v5, v0 1922b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v4, v1 1923b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 1924b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1925b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 1926b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 1927b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 1928b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1929b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 1930b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 1931b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB51_1 1932b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 1933b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 1934b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 1935b0a25468SMatt Arsenault; 1936b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset: 1937b0a25468SMatt Arsenault; GFX8: ; %bb.0: 1938b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1939b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 1940b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 1941b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 1942b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1943b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 1944b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 1945b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 1946b0a25468SMatt Arsenault; GFX8-NEXT: .LBB51_1: ; %atomicrmw.start 1947b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 1948b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1949b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, v7, v3 1950b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, v6, v2 1951b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v5, v0 1952b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v4, v1 1953b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 1954b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1955b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 1956b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 1957b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 1958b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1959b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 1960b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 1961b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB51_1 1962b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 1963b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1964b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 1965b0a25468SMatt Arsenault; 1966b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset: 1967b0a25468SMatt Arsenault; GFX9: ; %bb.0: 1968b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1969b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 1970b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 1971b0a25468SMatt Arsenault; GFX9-NEXT: .LBB51_1: ; %atomicrmw.start 1972b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1973b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1974b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 1975b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 1976b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 1977b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 1978b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 1979b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1980b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 1981b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 1982b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 1983b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1984b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 1985b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 1986b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB51_1 1987b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 1988b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1989b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 1990b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 1991b0a25468SMatt Arsenault %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 1992b0a25468SMatt Arsenault ret void 1993b0a25468SMatt Arsenault} 1994b0a25468SMatt Arsenault 1995b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret(ptr %ptr, i64 %in) { 1996b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret: 1997b0a25468SMatt Arsenault; GFX7: ; %bb.0: 1998b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1999b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v5, vcc, 4, v0 2000b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 2001b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v4, v[0:1] 2002b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v5, v[5:6] 2003b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 2004b0a25468SMatt Arsenault; GFX7-NEXT: .LBB52_1: ; %atomicrmw.start 2005b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2006b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2007b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 2008b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 2009b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v4, v7, v3 2010b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v8, v6, v2 2011b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v5, v4 2012b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v4, v8 2013b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 2014b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2015b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2016b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2017b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2018b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 2019b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB52_1 2020b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2021b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 2022b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v4 2023b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v5 2024b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2025b0a25468SMatt Arsenault; 2026b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret: 2027b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2028b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2029b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 2030b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 2031b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 2032b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 2033b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 2034b0a25468SMatt Arsenault; GFX8-NEXT: .LBB52_1: ; %atomicrmw.start 2035b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2036b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2037b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 2038b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 2039b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v4, v7, v3 2040b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v8, v6, v2 2041b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v5, v4 2042b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v4, v8 2043b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 2044b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2045b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2046b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2047b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2048b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 2049b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB52_1 2050b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2051b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 2052b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 2053b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 2054b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2055b0a25468SMatt Arsenault; 2056b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret: 2057b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2058b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2059b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 2060b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 2061b0a25468SMatt Arsenault; GFX9-NEXT: .LBB52_1: ; %atomicrmw.start 2062b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2063b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2064b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 2065b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 2066b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 2067b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 2068b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 2069b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 2070b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 2071b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2072b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2073b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2074b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2075b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 2076b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB52_1 2077b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2078b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 2079b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 2080b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 2081b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2082b0a25468SMatt Arsenault %result = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2083b0a25468SMatt Arsenault ret i64 %result 2084b0a25468SMatt Arsenault} 2085b0a25468SMatt Arsenault 2086b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret_offset(ptr %out, i64 %in) { 2087b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset: 2088b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2089b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2090b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 2091b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 2092b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 2093b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2094b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 2095b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 2096b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 2097b0a25468SMatt Arsenault; GFX7-NEXT: .LBB53_1: ; %atomicrmw.start 2098b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2099b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2100b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 2101b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 2102b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, v9, v3 2103b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, v8, v2 2104b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v7, v0 2105b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v6, v1 2106b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 2107b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2108b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2109b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 2110b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2111b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 2112b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB53_1 2113b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2114b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 2115b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2116b0a25468SMatt Arsenault; 2117b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset: 2118b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2119b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2120b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 2121b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 2122b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 2123b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2124b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 2125b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 2126b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 2127b0a25468SMatt Arsenault; GFX8-NEXT: .LBB53_1: ; %atomicrmw.start 2128b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2129b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2130b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 2131b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 2132b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, v9, v3 2133b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, v8, v2 2134b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v7, v0 2135b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v6, v1 2136b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 2137b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2138b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2139b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 2140b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2141b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 2142b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB53_1 2143b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2144b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 2145b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2146b0a25468SMatt Arsenault; 2147b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset: 2148b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2149b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2150b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 2151b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 2152b0a25468SMatt Arsenault; GFX9-NEXT: .LBB53_1: ; %atomicrmw.start 2153b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2154b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2155b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 2156b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 2157b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 2158b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 2159b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 2160b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 2161b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 2162b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2163b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2164b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2165b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2166b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 2167b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB53_1 2168b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2169b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 2170b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 2171b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 2172b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2173b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2174b0a25468SMatt Arsenault %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2175b0a25468SMatt Arsenault ret i64 %result 2176b0a25468SMatt Arsenault} 2177b0a25468SMatt Arsenault 2178b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_nand_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 2179b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_scalar: 2180b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2181b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2182b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 2183b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 2184b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 2185b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 2186b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s34 2187b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s35 2188b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[0:1] 2189b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[3:4] 2190*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s4 2191b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 2192*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s5 2193b0a25468SMatt Arsenault; GFX7-NEXT: .LBB54_1: ; %atomicrmw.start 2194b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2195b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2196b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, s7, v3 2197b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v6, s6, v2 2198b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v1, v0 2199b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v0, v6 2200b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 2201b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2202b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2203b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2204b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 2205b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2206b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 2207b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 2208b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB54_1 2209b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2210b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 2211b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2212b0a25468SMatt Arsenault; 2213b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_scalar: 2214b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2215b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2216b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 2217b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 2218b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 2219b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 2220b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s34 2221b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s35 2222b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[0:1] 2223b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[3:4] 2224*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s4 2225b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 2226*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s5 2227b0a25468SMatt Arsenault; GFX8-NEXT: .LBB54_1: ; %atomicrmw.start 2228b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2229b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2230b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, s7, v3 2231b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v6, s6, v2 2232b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v1, v0 2233b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v0, v6 2234b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 2235b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2236b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2237b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2238b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 2239b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2240b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 2241b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 2242b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB54_1 2243b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2244b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 2245b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2246b0a25468SMatt Arsenault; 2247b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_scalar: 2248b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2249b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 2251b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 2252b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 2253*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 2254b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 2255*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 2256b0a25468SMatt Arsenault; GFX9-NEXT: .LBB54_1: ; %atomicrmw.start 2257b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2258b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2259b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v0, s7, v3 2260b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v6, s6, v2 2261b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v1, v0 2262b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v0, v6 2263b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 2264b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2265b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2266b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2267b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 2268b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2269b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 2270b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 2271b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB54_1 2272b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2273b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 2274b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2275b0a25468SMatt Arsenault %tmp0 = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2276b0a25468SMatt Arsenault ret void 2277b0a25468SMatt Arsenault} 2278b0a25468SMatt Arsenault 2279b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_nand_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 2280b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset_scalar: 2281b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2282b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2283b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 2284b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 2285b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 2286b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 2287b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 2288b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 2289b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s34 2290b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v5, s35 2291b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[0:1] 2292b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[4:5] 2293*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 2294b0a25468SMatt Arsenault; GFX7-NEXT: .LBB55_1: ; %atomicrmw.start 2295b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2296b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2297b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, s7, v3 2298b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v6, s6, v2 2299b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v1, v0 2300b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v0, v6 2301b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 2302b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2303b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2304b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2305b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 2306*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2307b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 2308*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 2309b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB55_1 2310b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2311*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 2312b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2313b0a25468SMatt Arsenault; 2314b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset_scalar: 2315b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2316b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2317b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 2318b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 2319b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 2320b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 2321b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 2322b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 2323b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s34 2324b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v5, s35 2325b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 2326b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[4:5] 2327*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 2328b0a25468SMatt Arsenault; GFX8-NEXT: .LBB55_1: ; %atomicrmw.start 2329b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2330b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2331b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, s7, v3 2332b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v6, s6, v2 2333b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v1, v0 2334b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v0, v6 2335b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 2336b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2337b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2338b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2339b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 2340*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2341b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 2342*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 2343b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB55_1 2344b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2345*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 2346b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2347b0a25468SMatt Arsenault; 2348b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset_scalar: 2349b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2350b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2351b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 2352b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 2353b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 2354*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 2355b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 2356*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 2357b0a25468SMatt Arsenault; GFX9-NEXT: .LBB55_1: ; %atomicrmw.start 2358b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2359b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2360b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v0, s7, v3 2361b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v6, s6, v2 2362b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v1, v0 2363b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v0, v6 2364b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 2365b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2366b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2367b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 2368b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 2369b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2370b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 2371b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 2372b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB55_1 2373b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2374b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 2375b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2376b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2377b0a25468SMatt Arsenault %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2378b0a25468SMatt Arsenault ret void 2379b0a25468SMatt Arsenault} 2380b0a25468SMatt Arsenault 2381b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_nand_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 2382b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_scalar: 2383b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2384b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2385b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 2386b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 2387b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 2388b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 2389b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 2390b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 2391b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[0:1] 2392b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[2:3] 2393*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v2, s4 2394b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 2395*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v3, s5 2396b0a25468SMatt Arsenault; GFX7-NEXT: .LBB56_1: ; %atomicrmw.start 2397b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2398b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2399*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, v1 2400*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, v0 2401*eeac0ffaSNikita Popov; GFX7-NEXT: v_and_b32_e32 v0, s7, v7 2402*eeac0ffaSNikita Popov; GFX7-NEXT: v_and_b32_e32 v1, s6, v6 2403*eeac0ffaSNikita Popov; GFX7-NEXT: v_not_b32_e32 v5, v0 2404*eeac0ffaSNikita Popov; GFX7-NEXT: v_not_b32_e32 v4, v1 2405*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc 2406b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2407b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2408*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2409b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2410b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 2411b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB56_1 2412b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2413b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 2414b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2415b0a25468SMatt Arsenault; 2416b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_scalar: 2417b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2418b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2419b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 2420b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 2421b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 2422b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 2423b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 2424b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 2425b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[0:1] 2426b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[2:3] 2427*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s4 2428b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 2429*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, s5 2430b0a25468SMatt Arsenault; GFX8-NEXT: .LBB56_1: ; %atomicrmw.start 2431b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2432b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2433*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, v1 2434*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, v0 2435*eeac0ffaSNikita Popov; GFX8-NEXT: v_and_b32_e32 v0, s7, v7 2436*eeac0ffaSNikita Popov; GFX8-NEXT: v_and_b32_e32 v1, s6, v6 2437*eeac0ffaSNikita Popov; GFX8-NEXT: v_not_b32_e32 v5, v0 2438*eeac0ffaSNikita Popov; GFX8-NEXT: v_not_b32_e32 v4, v1 2439*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc 2440b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2441b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2442*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2443b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2444b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 2445b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB56_1 2446b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2447b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 2448b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2449b0a25468SMatt Arsenault; 2450b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_scalar: 2451b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2452b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2453b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 2454b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 2455b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2456*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 2457b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 2458*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 2459b0a25468SMatt Arsenault; GFX9-NEXT: .LBB56_1: ; %atomicrmw.start 2460b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2461b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2462*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, v1 2463*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, v0 2464*eeac0ffaSNikita Popov; GFX9-NEXT: v_and_b32_e32 v0, s7, v7 2465*eeac0ffaSNikita Popov; GFX9-NEXT: v_and_b32_e32 v1, s6, v6 2466*eeac0ffaSNikita Popov; GFX9-NEXT: v_not_b32_e32 v5, v0 2467*eeac0ffaSNikita Popov; GFX9-NEXT: v_not_b32_e32 v4, v1 2468*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc 2469b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2470b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2471*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2472b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2473b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 2474b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB56_1 2475b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2476b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 2477b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2478b0a25468SMatt Arsenault %result = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2479b0a25468SMatt Arsenault ret i64 %result 2480b0a25468SMatt Arsenault} 2481b0a25468SMatt Arsenault 2482b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_nand_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 2483b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset_scalar: 2484b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2485b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2486b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 2487b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 2488b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 2489b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 2490b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 2491b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 2492b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 2493b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 2494b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 2495b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[2:3] 2496*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 2497b0a25468SMatt Arsenault; GFX7-NEXT: .LBB57_1: ; %atomicrmw.start 2498b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2499b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2500*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, v1 2501*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, v0 2502*eeac0ffaSNikita Popov; GFX7-NEXT: v_and_b32_e32 v0, s7, v7 2503*eeac0ffaSNikita Popov; GFX7-NEXT: v_and_b32_e32 v1, s6, v6 2504*eeac0ffaSNikita Popov; GFX7-NEXT: v_not_b32_e32 v5, v0 2505*eeac0ffaSNikita Popov; GFX7-NEXT: v_not_b32_e32 v4, v1 2506*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc 2507b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2508b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2509*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2510*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2511*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 2512b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB57_1 2513b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2514*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 2515b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2516b0a25468SMatt Arsenault; 2517b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset_scalar: 2518b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2519b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2520b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 2521b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 2522b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 2523b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 2524b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 2525b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 2526b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 2527b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 2528b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 2529b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[2:3] 2530*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 2531b0a25468SMatt Arsenault; GFX8-NEXT: .LBB57_1: ; %atomicrmw.start 2532b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2533b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2534*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, v1 2535*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, v0 2536*eeac0ffaSNikita Popov; GFX8-NEXT: v_and_b32_e32 v0, s7, v7 2537*eeac0ffaSNikita Popov; GFX8-NEXT: v_and_b32_e32 v1, s6, v6 2538*eeac0ffaSNikita Popov; GFX8-NEXT: v_not_b32_e32 v5, v0 2539*eeac0ffaSNikita Popov; GFX8-NEXT: v_not_b32_e32 v4, v1 2540*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc 2541b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2542b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2543*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2544*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2545*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 2546b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB57_1 2547b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2548*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 2549b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2550b0a25468SMatt Arsenault; 2551b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset_scalar: 2552b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2553b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2554b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 2555b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 2556b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] offset:32 2557*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 2558b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 2559*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 2560b0a25468SMatt Arsenault; GFX9-NEXT: .LBB57_1: ; %atomicrmw.start 2561b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2562b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2563*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, v1 2564*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, v0 2565*eeac0ffaSNikita Popov; GFX9-NEXT: v_and_b32_e32 v0, s7, v7 2566*eeac0ffaSNikita Popov; GFX9-NEXT: v_and_b32_e32 v1, s6, v6 2567*eeac0ffaSNikita Popov; GFX9-NEXT: v_not_b32_e32 v5, v0 2568*eeac0ffaSNikita Popov; GFX9-NEXT: v_not_b32_e32 v4, v1 2569*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] offset:32 glc 2570b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2571b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2572*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2573b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 2574b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 2575b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB57_1 2576b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2577b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 2578b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2579b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2580b0a25468SMatt Arsenault %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2581b0a25468SMatt Arsenault ret i64 %result 2582b0a25468SMatt Arsenault} 2583b0a25468SMatt Arsenault 2584b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 2585b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory: 2586b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2587b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2588b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 2589b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 2590b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 2591b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2592b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 2593b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 2594b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 2595b0a25468SMatt Arsenault; GFX7-NEXT: .LBB58_1: ; %atomicrmw.start 2596b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2597b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2598b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, v7, v3 2599b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, v6, v2 2600b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v5, v0 2601b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v4, v1 2602b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 2603b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2604b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2605b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2606b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 2607b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2608b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 2609b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 2610b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB58_1 2611b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2612b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 2613b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2614b0a25468SMatt Arsenault; 2615b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory: 2616b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2617b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2618b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 2619b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 2620b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 2621b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2622b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 2623b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 2624b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 2625b0a25468SMatt Arsenault; GFX8-NEXT: .LBB58_1: ; %atomicrmw.start 2626b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2627b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2628b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, v7, v3 2629b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, v6, v2 2630b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v5, v0 2631b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v4, v1 2632b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 2633b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2634b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2635b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 2636b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 2637b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2638b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 2639b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 2640b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB58_1 2641b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2642b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 2643b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2644b0a25468SMatt Arsenault; 2645b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory: 2646b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2647b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2648b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 2649b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 2650b0a25468SMatt Arsenault; GFX9-NEXT: .LBB58_1: ; %atomicrmw.start 2651b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2652b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2653b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 2654b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 2655b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 2656b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 2657b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 2658b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2659b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2660b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2661b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 2662b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2663b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 2664b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 2665b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB58_1 2666b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2667b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 2668b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2669b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2670b0a25468SMatt Arsenault %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 2671b0a25468SMatt Arsenault ret void 2672b0a25468SMatt Arsenault} 2673b0a25468SMatt Arsenault 2674b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 2675b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory: 2676b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2677b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2678b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 2679b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 2680b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 2681b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2682b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 2683b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 2684b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 2685b0a25468SMatt Arsenault; GFX7-NEXT: .LBB59_1: ; %atomicrmw.start 2686b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 2687b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2688b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 2689b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 2690b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v0, v9, v3 2691b0a25468SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, v8, v2 2692b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v7, v0 2693b0a25468SMatt Arsenault; GFX7-NEXT: v_not_b32_e32 v6, v1 2694b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 2695b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2696b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2697b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 2698b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2699b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 2700b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB59_1 2701b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 2702b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 2703b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2704b0a25468SMatt Arsenault; 2705b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory: 2706b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2707b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2708b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 2709b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 2710b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 2711b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2712b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 2713b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 2714b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 2715b0a25468SMatt Arsenault; GFX8-NEXT: .LBB59_1: ; %atomicrmw.start 2716b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 2717b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2718b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 2719b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 2720b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v0, v9, v3 2721b0a25468SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, v8, v2 2722b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v7, v0 2723b0a25468SMatt Arsenault; GFX8-NEXT: v_not_b32_e32 v6, v1 2724b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 2725b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2726b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2727b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 2728b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2729b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 2730b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB59_1 2731b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 2732b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 2733b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2734b0a25468SMatt Arsenault; 2735b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory: 2736b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2737b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2738b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 2739b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 2740b0a25468SMatt Arsenault; GFX9-NEXT: .LBB59_1: ; %atomicrmw.start 2741b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2742b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2743b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 2744b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 2745b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v4, v7, v3 2746b0a25468SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 2747b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v5, v4 2748b0a25468SMatt Arsenault; GFX9-NEXT: v_not_b32_e32 v4, v8 2749b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 2750b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2751b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2752b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 2753b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2754b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 2755b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB59_1 2756b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 2757b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 2758b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 2759b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 2760b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2761b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2762b0a25468SMatt Arsenault %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 2763b0a25468SMatt Arsenault ret i64 %result 2764b0a25468SMatt Arsenault} 2765b0a25468SMatt Arsenault 2766b0a25468SMatt Arsenault; --------------------------------------------------------------------- 2767b0a25468SMatt Arsenault; atomicrmw or 2768b0a25468SMatt Arsenault; --------------------------------------------------------------------- 2769b0a25468SMatt Arsenault 2770b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret(ptr %ptr, i64 %in) { 2771b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret: 2772b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2773b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2774b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 2775b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2776b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2777b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2778b0a25468SMatt Arsenault; 2779b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret: 2780b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2781b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2782b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 2783b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2784b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2785b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2786b0a25468SMatt Arsenault; 2787b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret: 2788b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2789b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2790b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 2791b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2792b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2793b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2794b0a25468SMatt Arsenault %tmp0 = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2795b0a25468SMatt Arsenault ret void 2796b0a25468SMatt Arsenault} 2797b0a25468SMatt Arsenault 2798b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret_offset(ptr %out, i64 %in) { 2799b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset: 2800b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2801b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2802b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 2803b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2804b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 2805b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2806b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2807b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2808b0a25468SMatt Arsenault; 2809b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset: 2810b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2811b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2812b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 2813b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2814b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 2815b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2816b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2817b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2818b0a25468SMatt Arsenault; 2819b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset: 2820b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2821b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2822b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] offset:32 2823b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2824b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2825b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2826b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2827b0a25468SMatt Arsenault %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2828b0a25468SMatt Arsenault ret void 2829b0a25468SMatt Arsenault} 2830b0a25468SMatt Arsenault 2831b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret(ptr %ptr, i64 %in) { 2832b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret: 2833b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2834b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2835b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 2836b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2837b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2838b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2839b0a25468SMatt Arsenault; 2840b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret: 2841b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2842b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2843b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 2844b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2845b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2846b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2847b0a25468SMatt Arsenault; 2848b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret: 2849b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2850b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2851b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 2852b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2853b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2854b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2855b0a25468SMatt Arsenault %result = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2856b0a25468SMatt Arsenault ret i64 %result 2857b0a25468SMatt Arsenault} 2858b0a25468SMatt Arsenault 2859b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret_offset(ptr %out, i64 %in) { 2860b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset: 2861b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2862b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2863b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 2864b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2865b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 2866b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2867b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2868b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2869b0a25468SMatt Arsenault; 2870b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset: 2871b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2872b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 2874b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2875b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 2876b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2877b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2878b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2879b0a25468SMatt Arsenault; 2880b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset: 2881b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2882b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2883b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 2884b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2885b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2886b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2887b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2888b0a25468SMatt Arsenault %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2889b0a25468SMatt Arsenault ret i64 %result 2890b0a25468SMatt Arsenault} 2891b0a25468SMatt Arsenault 2892b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_or_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 2893b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_scalar: 2894b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2895b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2896b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 2897b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 2898b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 2899b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 2900b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] 2901b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2902b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2903b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2904b0a25468SMatt Arsenault; 2905b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_scalar: 2906b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2907b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2908b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 2909b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 2910b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 2911b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 2912b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] 2913b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2914b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2915b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2916b0a25468SMatt Arsenault; 2917b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_scalar: 2918b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2919b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2920b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 2921b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 2922b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 2923b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 2924b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] 2925b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2926b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2927b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2928b0a25468SMatt Arsenault %tmp0 = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 2929b0a25468SMatt Arsenault ret void 2930b0a25468SMatt Arsenault} 2931b0a25468SMatt Arsenault 2932b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_or_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 2933b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset_scalar: 2934b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2935b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2936b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 2937b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 2938b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 2939b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 2940b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 2941b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 2942b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] 2943b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2944b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2945b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2946b0a25468SMatt Arsenault; 2947b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset_scalar: 2948b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2949b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2950b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 2951b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 2952b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 2953b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 2954b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 2955b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 2956b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] 2957b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2958b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 2959b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 2960b0a25468SMatt Arsenault; 2961b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset_scalar: 2962b0a25468SMatt Arsenault; GFX9: ; %bb.0: 2963b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2964b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 2965b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 2966b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 2967b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 2968b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] offset:32 2969b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2970b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 2971b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 2972b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 2973b0a25468SMatt Arsenault %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 2974b0a25468SMatt Arsenault ret void 2975b0a25468SMatt Arsenault} 2976b0a25468SMatt Arsenault 2977b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_or_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 2978b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_scalar: 2979b0a25468SMatt Arsenault; GFX7: ; %bb.0: 2980b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2981b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 2982b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 2983b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 2984b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 2985b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc 2986b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2987b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 2988b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2989b0a25468SMatt Arsenault; 2990b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_scalar: 2991b0a25468SMatt Arsenault; GFX8: ; %bb.0: 2992b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2993b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 2994b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 2995b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 2996b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 2997b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc 2998b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2999b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3000b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3001b0a25468SMatt Arsenault; 3002b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_scalar: 3003b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3004b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3005b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3006b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3007b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3008b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3009b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc 3010b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3011b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3012b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3013b0a25468SMatt Arsenault %result = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3014b0a25468SMatt Arsenault ret i64 %result 3015b0a25468SMatt Arsenault} 3016b0a25468SMatt Arsenault 3017b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_or_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 3018b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset_scalar: 3019b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3020b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3021b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 3022b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 3023b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 3024b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 3025b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 3026b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 3027b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc 3028b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3029b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3030b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3031b0a25468SMatt Arsenault; 3032b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset_scalar: 3033b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3034b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3035b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 3036b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 3037b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 3038b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 3039b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 3040b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 3041b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc 3042b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3043b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3044b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3045b0a25468SMatt Arsenault; 3046b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset_scalar: 3047b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3048b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3049b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3050b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3051b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3052b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3053b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 3054b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3055b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3056b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3057b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3058b0a25468SMatt Arsenault %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3059b0a25468SMatt Arsenault ret i64 %result 3060b0a25468SMatt Arsenault} 3061b0a25468SMatt Arsenault 3062b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 3063b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory: 3064b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3065b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3066b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3067b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3068b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 3069b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3070b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3071b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3072b0a25468SMatt Arsenault; 3073b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory: 3074b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3075b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3076b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3077b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3078b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] 3079b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3080b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3081b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3082b0a25468SMatt Arsenault; 3083b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory: 3084b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3085b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3086b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] offset:32 3087b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3088b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3089b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3090b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3091b0a25468SMatt Arsenault %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 3092b0a25468SMatt Arsenault ret void 3093b0a25468SMatt Arsenault} 3094b0a25468SMatt Arsenault 3095b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 3096b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory: 3097b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3098b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3099b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3100b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3101b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 3102b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3103b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3104b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3105b0a25468SMatt Arsenault; 3106b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory: 3107b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3108b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3109b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3110b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3111b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc 3112b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3113b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3114b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3115b0a25468SMatt Arsenault; 3116b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory: 3117b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3118b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3119b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 3120b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3121b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3122b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3123b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3124b0a25468SMatt Arsenault %result = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 3125b0a25468SMatt Arsenault ret i64 %result 3126b0a25468SMatt Arsenault} 3127b0a25468SMatt Arsenault 3128b0a25468SMatt Arsenault; --------------------------------------------------------------------- 3129b0a25468SMatt Arsenault; atomicrmw xor 3130b0a25468SMatt Arsenault; --------------------------------------------------------------------- 3131b0a25468SMatt Arsenault 3132b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret(ptr %ptr, i64 %in) { 3133b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret: 3134b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3135b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3136b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3137b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3138b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3139b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3140b0a25468SMatt Arsenault; 3141b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret: 3142b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3143b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3144b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3145b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3146b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3147b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3148b0a25468SMatt Arsenault; 3149b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret: 3150b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3151b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3152b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3153b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3154b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3155b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3156b0a25468SMatt Arsenault %tmp0 = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3157b0a25468SMatt Arsenault ret void 3158b0a25468SMatt Arsenault} 3159b0a25468SMatt Arsenault 3160b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret_offset(ptr %out, i64 %in) { 3161b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset: 3162b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3163b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3164b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3165b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3166b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3167b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3168b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3169b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3170b0a25468SMatt Arsenault; 3171b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset: 3172b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3173b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3174b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3175b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3176b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3177b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3178b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3179b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3180b0a25468SMatt Arsenault; 3181b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset: 3182b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3183b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3184b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] offset:32 3185b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3186b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3187b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3188b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3189b0a25468SMatt Arsenault %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3190b0a25468SMatt Arsenault ret void 3191b0a25468SMatt Arsenault} 3192b0a25468SMatt Arsenault 3193b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret(ptr %ptr, i64 %in) { 3194b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret: 3195b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3196b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3197b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3198b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3199b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3200b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3201b0a25468SMatt Arsenault; 3202b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret: 3203b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3204b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3205b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3206b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3207b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3208b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3209b0a25468SMatt Arsenault; 3210b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret: 3211b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3212b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3213b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3214b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3215b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3216b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3217b0a25468SMatt Arsenault %result = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3218b0a25468SMatt Arsenault ret i64 %result 3219b0a25468SMatt Arsenault} 3220b0a25468SMatt Arsenault 3221b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret_offset(ptr %out, i64 %in) { 3222b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset: 3223b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3224b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3225b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3226b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3227b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3228b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3229b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3230b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3231b0a25468SMatt Arsenault; 3232b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset: 3233b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3234b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3235b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3236b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3237b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3238b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3239b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3240b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3241b0a25468SMatt Arsenault; 3242b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset: 3243b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3244b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3245b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 3246b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3247b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3248b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3249b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3250b0a25468SMatt Arsenault %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3251b0a25468SMatt Arsenault ret i64 %result 3252b0a25468SMatt Arsenault} 3253b0a25468SMatt Arsenault 3254b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xor_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 3255b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_scalar: 3256b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3257b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3258b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 3259b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 3260b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 3261b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 3262b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] 3263b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3264b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3265b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3266b0a25468SMatt Arsenault; 3267b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_scalar: 3268b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3269b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3270b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 3271b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 3272b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 3273b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 3274b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] 3275b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3276b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3277b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3278b0a25468SMatt Arsenault; 3279b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_scalar: 3280b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3281b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3282b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3283b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3284b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3285b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3286b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] 3287b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3288b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3289b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3290b0a25468SMatt Arsenault %tmp0 = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3291b0a25468SMatt Arsenault ret void 3292b0a25468SMatt Arsenault} 3293b0a25468SMatt Arsenault 3294b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xor_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 3295b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset_scalar: 3296b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3297b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3298b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 3299b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 3300b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 3301b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 3302b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 3303b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 3304b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] 3305b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3306b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3307b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3308b0a25468SMatt Arsenault; 3309b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset_scalar: 3310b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3311b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3312b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 3313b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 3314b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 3315b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 3316b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 3317b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 3318b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] 3319b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3320b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3321b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3322b0a25468SMatt Arsenault; 3323b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset_scalar: 3324b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3325b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3326b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3327b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3328b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3329b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3330b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] offset:32 3331b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3332b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3333b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3334b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3335b0a25468SMatt Arsenault %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3336b0a25468SMatt Arsenault ret void 3337b0a25468SMatt Arsenault} 3338b0a25468SMatt Arsenault 3339b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xor_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 3340b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_scalar: 3341b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3342b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3343b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 3344b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 3345b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 3346b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 3347b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc 3348b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3349b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3350b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3351b0a25468SMatt Arsenault; 3352b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_scalar: 3353b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3354b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3355b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 3356b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 3357b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 3358b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 3359b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc 3360b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3361b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3362b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3363b0a25468SMatt Arsenault; 3364b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_scalar: 3365b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3366b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3367b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3368b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3369b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3370b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3371b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc 3372b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3373b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3374b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3375b0a25468SMatt Arsenault %result = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3376b0a25468SMatt Arsenault ret i64 %result 3377b0a25468SMatt Arsenault} 3378b0a25468SMatt Arsenault 3379b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xor_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 3380b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset_scalar: 3381b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3382b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3383b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 3384b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 3385b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 3386b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 3387b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 3388b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 3389b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc 3390b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3391b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3392b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3393b0a25468SMatt Arsenault; 3394b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset_scalar: 3395b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3396b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3397b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 3398b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 3399b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 3400b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 3401b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 3402b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 3403b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc 3404b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3405b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3406b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3407b0a25468SMatt Arsenault; 3408b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset_scalar: 3409b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3410b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3411b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 3412b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 3413b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 3414b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 3415b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 3416b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3417b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3418b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3419b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3420b0a25468SMatt Arsenault %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3421b0a25468SMatt Arsenault ret i64 %result 3422b0a25468SMatt Arsenault} 3423b0a25468SMatt Arsenault 3424b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 3425b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory: 3426b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3427b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3428b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3429b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3430b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3431b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3432b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3433b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3434b0a25468SMatt Arsenault; 3435b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory: 3436b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3437b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3438b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3439b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3440b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] 3441b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3442b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3443b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3444b0a25468SMatt Arsenault; 3445b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory: 3446b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3447b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3448b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] offset:32 3449b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3450b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3451b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3452b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3453b0a25468SMatt Arsenault %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 3454b0a25468SMatt Arsenault ret void 3455b0a25468SMatt Arsenault} 3456b0a25468SMatt Arsenault 3457b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 3458b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory: 3459b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3460b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3461b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 3462b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3463b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3464b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3465b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3466b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3467b0a25468SMatt Arsenault; 3468b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory: 3469b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3470b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3471b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 3472b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3473b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc 3474b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3475b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3476b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3477b0a25468SMatt Arsenault; 3478b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory: 3479b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3480b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3481b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 3482b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3483b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3484b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3485b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3486b0a25468SMatt Arsenault %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 3487b0a25468SMatt Arsenault ret i64 %result 3488b0a25468SMatt Arsenault} 3489b0a25468SMatt Arsenault 3490b0a25468SMatt Arsenault; --------------------------------------------------------------------- 3491b0a25468SMatt Arsenault; atomicrmw max 3492b0a25468SMatt Arsenault; --------------------------------------------------------------------- 3493b0a25468SMatt Arsenault 3494b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret(ptr %ptr, i64 %in) { 3495b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret: 3496b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3497b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3498b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 4, v0 3499b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 3500b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[0:1] 3501b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[4:5] 3502b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 3503b0a25468SMatt Arsenault; GFX7-NEXT: .LBB80_1: ; %atomicrmw.start 3504b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3505b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3506b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3507b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3508b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3509b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3510b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3511b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3512b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3513b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 3514b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3515b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 3516b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 3517b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB80_1 3518b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3519b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 3520b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3521b0a25468SMatt Arsenault; 3522b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret: 3523b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3524b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3525b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 4, v0 3526b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 3527b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[0:1] 3528b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[4:5] 3529b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 3530b0a25468SMatt Arsenault; GFX8-NEXT: .LBB80_1: ; %atomicrmw.start 3531b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3532b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3533b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3534b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3535b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3536b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3537b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3538b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3539b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3540b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 3541b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3542b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 3543b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 3544b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB80_1 3545b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 3546b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 3547b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3548b0a25468SMatt Arsenault; 3549b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret: 3550b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3551b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3552b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] 3553b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 3554b0a25468SMatt Arsenault; GFX9-NEXT: .LBB80_1: ; %atomicrmw.start 3555b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3556b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3557b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3558b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3559b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3560b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3561b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3562b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3563b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3564b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 3565b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3566b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 3567b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 3568b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB80_1 3569b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 3570b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 3571b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3572b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3573b0a25468SMatt Arsenault ret void 3574b0a25468SMatt Arsenault} 3575b0a25468SMatt Arsenault 3576b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret_offset(ptr %out, i64 %in) { 3577b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset: 3578b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3579b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3580b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 3581b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 3582b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 3583b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3584b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 3585b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 3586b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 3587b0a25468SMatt Arsenault; GFX7-NEXT: .LBB81_1: ; %atomicrmw.start 3588b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3589b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3590b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3591b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3592b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3593b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 3594b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3595b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3596b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 3597b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 3598b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3599b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 3600b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 3601b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB81_1 3602b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3603b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 3604b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3605b0a25468SMatt Arsenault; 3606b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset: 3607b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3608b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3609b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 3610b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 3611b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 3612b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3613b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 3614b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 3615b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 3616b0a25468SMatt Arsenault; GFX8-NEXT: .LBB81_1: ; %atomicrmw.start 3617b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3618b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3619b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3620b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3621b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3622b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 3623b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3624b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3625b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 3626b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 3627b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3628b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 3629b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 3630b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB81_1 3631b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 3632b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 3633b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3634b0a25468SMatt Arsenault; 3635b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset: 3636b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3637b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3638b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 3639b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 3640b0a25468SMatt Arsenault; GFX9-NEXT: .LBB81_1: ; %atomicrmw.start 3641b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3642b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3643b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3644b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3645b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3646b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 3647b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3648b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3649b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3650b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 3651b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3652b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 3653b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 3654b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB81_1 3655b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 3656b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 3657b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3658b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3659b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3660b0a25468SMatt Arsenault ret void 3661b0a25468SMatt Arsenault} 3662b0a25468SMatt Arsenault 3663b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret(ptr %ptr, i64 %in) { 3664b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret: 3665b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3666b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3667b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v5, vcc, 4, v0 3668b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 3669b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v4, v[0:1] 3670b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v5, v[5:6] 3671b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 3672b0a25468SMatt Arsenault; GFX7-NEXT: .LBB82_1: ; %atomicrmw.start 3673b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3674b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3675b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 3676b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 3677b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3678b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3679b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3680b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3681b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3682b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3683b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3684b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3685b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 3686b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB82_1 3687b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3688b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 3689b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v4 3690b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v5 3691b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3692b0a25468SMatt Arsenault; 3693b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret: 3694b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3695b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3696b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 3697b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 3698b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 3699b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 3700b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 3701b0a25468SMatt Arsenault; GFX8-NEXT: .LBB82_1: ; %atomicrmw.start 3702b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3703b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3704b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 3705b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 3706b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3707b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3708b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3709b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3710b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3711b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3712b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3713b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3714b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 3715b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB82_1 3716b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 3717b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 3718b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 3719b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 3720b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3721b0a25468SMatt Arsenault; 3722b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret: 3723b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3724b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3725b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 3726b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 3727b0a25468SMatt Arsenault; GFX9-NEXT: .LBB82_1: ; %atomicrmw.start 3728b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3729b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3730b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 3731b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 3732b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3733b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3734b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3735b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 3736b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3737b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3738b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3739b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3740b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 3741b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB82_1 3742b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 3743b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 3744b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 3745b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 3746b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3747b0a25468SMatt Arsenault %result = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3748b0a25468SMatt Arsenault ret i64 %result 3749b0a25468SMatt Arsenault} 3750b0a25468SMatt Arsenault 3751b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret_offset(ptr %out, i64 %in) { 3752b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset: 3753b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3754b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3755b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 3756b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 3757b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 3758b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3759b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 3760b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 3761b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 3762b0a25468SMatt Arsenault; GFX7-NEXT: .LBB83_1: ; %atomicrmw.start 3763b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3764b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3765b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 3766b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 3767b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] 3768b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 3769b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 3770b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 3771b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3772b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3773b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 3774b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3775b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 3776b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB83_1 3777b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3778b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 3779b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3780b0a25468SMatt Arsenault; 3781b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset: 3782b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3783b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3784b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 3785b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 3786b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 3787b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3788b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 3789b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 3790b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 3791b0a25468SMatt Arsenault; GFX8-NEXT: .LBB83_1: ; %atomicrmw.start 3792b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3793b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3794b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 3795b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 3796b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] 3797b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 3798b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 3799b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 3800b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3801b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3802b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 3803b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3804b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 3805b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB83_1 3806b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 3807b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 3808b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3809b0a25468SMatt Arsenault; 3810b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset: 3811b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3812b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3813b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 3814b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 3815b0a25468SMatt Arsenault; GFX9-NEXT: .LBB83_1: ; %atomicrmw.start 3816b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3817b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3818b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 3819b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 3820b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 3821b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 3822b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 3823b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 3824b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3825b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3826b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 3827b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3828b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 3829b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB83_1 3830b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 3831b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 3832b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 3833b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 3834b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3835b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 3836b0a25468SMatt Arsenault %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 3837b0a25468SMatt Arsenault ret i64 %result 3838b0a25468SMatt Arsenault} 3839b0a25468SMatt Arsenault 3840b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 3841b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_scalar: 3842b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3843b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3844b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 3845b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 3846b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 3847b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 3848b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s34 3849b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s35 3850b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[0:1] 3851b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[3:4] 3852*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s4 3853b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 3854*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 3855*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 3856*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s5 3857b0a25468SMatt Arsenault; GFX7-NEXT: .LBB84_1: ; %atomicrmw.start 3858b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3859b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3860b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 3861*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 3862*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 3863b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 3864b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3865b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3866b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 3867b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 3868b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 3869b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 3870b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 3871b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB84_1 3872b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3873b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 3874b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3875b0a25468SMatt Arsenault; 3876b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_scalar: 3877b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3878b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3879b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 3880b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 3881b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 3882b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 3883b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s34 3884b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s35 3885b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[0:1] 3886b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[3:4] 3887*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s4 3888b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 3889*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 3890*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 3891*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s5 3892b0a25468SMatt Arsenault; GFX8-NEXT: .LBB84_1: ; %atomicrmw.start 3893b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3894b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3895b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 3896*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 3897*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 3898b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 3899b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3900b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 3901b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 3902b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 3903b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 3904b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 3905b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 3906b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB84_1 3907b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 3908b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 3909b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3910b0a25468SMatt Arsenault; 3911b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_scalar: 3912b0a25468SMatt Arsenault; GFX9: ; %bb.0: 3913b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3914b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 3915b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 3916b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 3917*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 3918b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 3919*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 3920*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 3921*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 3922b0a25468SMatt Arsenault; GFX9-NEXT: .LBB84_1: ; %atomicrmw.start 3923b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 3924b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3925b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 3926*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 3927*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 3928b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 3929b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3930b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 3931b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 3932b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 3933b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 3934b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 3935b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 3936b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB84_1 3937b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 3938b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 3939b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3940b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 3941b0a25468SMatt Arsenault ret void 3942b0a25468SMatt Arsenault} 3943b0a25468SMatt Arsenault 3944b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 3945b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset_scalar: 3946b0a25468SMatt Arsenault; GFX7: ; %bb.0: 3947b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3948b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 3949b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 3950b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 3951b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 3952b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 3953b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 3954b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s34 3955b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v5, s35 3956b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[0:1] 3957b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[4:5] 3958*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 3959*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 3960*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 3961b0a25468SMatt Arsenault; GFX7-NEXT: .LBB85_1: ; %atomicrmw.start 3962b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 3963b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3964b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 3965*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 3966*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 3967b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 3968b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3969b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 3970b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 3971b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 3972*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 3973b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 3974*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 3975b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB85_1 3976b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 3977*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 3978b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 3979b0a25468SMatt Arsenault; 3980b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset_scalar: 3981b0a25468SMatt Arsenault; GFX8: ; %bb.0: 3982b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3983b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 3984b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 3985b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 3986b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 3987b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 3988b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 3989b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s34 3990b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v5, s35 3991b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 3992b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[4:5] 3993*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 3994*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 3995*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 3996b0a25468SMatt Arsenault; GFX8-NEXT: .LBB85_1: ; %atomicrmw.start 3997b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 3998b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3999b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 4000*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4001*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4002b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4003b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4004b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4005b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4006b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 4007*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4008b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 4009*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 4010b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB85_1 4011b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4012*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 4013b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4014b0a25468SMatt Arsenault; 4015b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset_scalar: 4016b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4017b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4018b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 4019b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 4020b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 4021*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 4022b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 4023*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 4024*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 4025*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 4026b0a25468SMatt Arsenault; GFX9-NEXT: .LBB85_1: ; %atomicrmw.start 4027b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4028b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4029b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3] 4030*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4031*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4032b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 4033b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4034b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4035b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4036b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 4037b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4038b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 4039b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 4040b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB85_1 4041b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4042b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 4043b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4044b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 4045b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 4046b0a25468SMatt Arsenault ret void 4047b0a25468SMatt Arsenault} 4048b0a25468SMatt Arsenault 4049b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 4050b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_scalar: 4051b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4052b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4053b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 4054b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 4055b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 4056b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 4057b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 4058b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 4059b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[0:1] 4060b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[2:3] 4061*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v2, s4 4062b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 4063*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 4064*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 4065*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v3, s5 4066b0a25468SMatt Arsenault; GFX7-NEXT: .LBB86_1: ; %atomicrmw.start 4067b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4068b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4069*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 4070*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 4071*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4072*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4073*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4074*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 4075b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4076b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4077*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4078b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4079b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 4080b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB86_1 4081b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4082b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 4083b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4084b0a25468SMatt Arsenault; 4085b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_scalar: 4086b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4087b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4088b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 4089b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 4090b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 4091b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 4092b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 4093b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 4094b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[0:1] 4095b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[2:3] 4096*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s4 4097b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 4098*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 4099*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 4100*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, s5 4101b0a25468SMatt Arsenault; GFX8-NEXT: .LBB86_1: ; %atomicrmw.start 4102b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4103b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4104*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 4105*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 4106*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4107*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4108*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4109*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 4110b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4111b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4112*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4113b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4114b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 4115b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB86_1 4116b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4117b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 4118b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4119b0a25468SMatt Arsenault; 4120b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_scalar: 4121b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4122b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4123b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 4124b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 4125b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 4126*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 4127b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 4128*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 4129*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 4130*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 4131b0a25468SMatt Arsenault; GFX9-NEXT: .LBB86_1: ; %atomicrmw.start 4132b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4133b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4134*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 4135*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 4136*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4137*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4138*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4139*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 4140b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4141b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4142*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4143b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4144b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 4145b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB86_1 4146b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4147b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 4148b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4149b0a25468SMatt Arsenault %result = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 4150b0a25468SMatt Arsenault ret i64 %result 4151b0a25468SMatt Arsenault} 4152b0a25468SMatt Arsenault 4153b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 4154b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset_scalar: 4155b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4156b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4157b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 4158b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 4159b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 4160b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 4161b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 4162b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 4163b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 4164b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 4165b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 4166b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[2:3] 4167*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 4168*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 4169*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 4170b0a25468SMatt Arsenault; GFX7-NEXT: .LBB87_1: ; %atomicrmw.start 4171b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4172b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4173*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 4174*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 4175*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4176*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4177*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4178*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 4179b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4180b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4181*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4182*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4183*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 4184b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB87_1 4185b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4186*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 4187b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4188b0a25468SMatt Arsenault; 4189b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset_scalar: 4190b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4191b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4192b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 4193b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 4194b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 4195b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 4196b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 4197b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 4198b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 4199b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 4200b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 4201b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[2:3] 4202*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 4203*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 4204*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 4205b0a25468SMatt Arsenault; GFX8-NEXT: .LBB87_1: ; %atomicrmw.start 4206b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4207b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4208*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 4209*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 4210*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4211*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4212*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4213*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 4214b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4215b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4216*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4217*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4218*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 4219b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB87_1 4220b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4221*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 4222b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4223b0a25468SMatt Arsenault; 4224b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset_scalar: 4225b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4226b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4227b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 4228b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 4229b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] offset:32 4230*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 4231b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 4232*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 4233*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 4234*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 4235b0a25468SMatt Arsenault; GFX9-NEXT: .LBB87_1: ; %atomicrmw.start 4236b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4237b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4238*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 4239*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 4240*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9] 4241*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4242*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4243*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc 4244b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4245b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4246*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4247b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 4248b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 4249b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB87_1 4250b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4251b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 4252b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4253b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 4254b0a25468SMatt Arsenault %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 4255b0a25468SMatt Arsenault ret i64 %result 4256b0a25468SMatt Arsenault} 4257b0a25468SMatt Arsenault 4258b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %index) { 4259b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_addr64_offset: 4260b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 42616548b635SShilei Tian; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 42626548b635SShilei Tian; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4263b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 42646548b635SShilei Tian; GFX7-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 4265b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s4 4266b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s5 4267b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 4268b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 4269*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s1 4270*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s0 4271*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 4272*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 4273*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s3 4274*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s2 4275b0a25468SMatt Arsenault; GFX7-NEXT: .LBB88_1: ; %atomicrmw.start 4276b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4277b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4278b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4279*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4280*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4281b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4282b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4283b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4284b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4285b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 4286*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4287b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 4288*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 4289b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB88_1 4290b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4291b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 4292b0a25468SMatt Arsenault; 4293b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_addr64_offset: 4294b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 42956548b635SShilei Tian; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 42966548b635SShilei Tian; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4297b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 42986548b635SShilei Tian; GFX8-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 4299b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s4 4300b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s5 4301b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 4302b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 4303*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s1 4304*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s0 4305*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 4306*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 4307*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s3 4308*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s2 4309b0a25468SMatt Arsenault; GFX8-NEXT: .LBB88_1: ; %atomicrmw.start 4310b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4311b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4312b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4313*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4314*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4315b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4316b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4317b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4318b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4319b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 4320*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4321b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 4322*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 4323b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB88_1 4324b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4325b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 4326b0a25468SMatt Arsenault; 4327b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_addr64_offset: 4328b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 43296548b635SShilei Tian; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 43306548b635SShilei Tian; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4331b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 43326548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 43336548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s0, s4 43346548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s1, s5 4335*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s1 4336*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s0 4337*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[4:5] offset:32 4338*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 4339*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s3 4340*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s2 4341b0a25468SMatt Arsenault; GFX9-NEXT: .LBB88_1: ; %atomicrmw.start 4342b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4343b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 43446548b635SShilei Tian; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4345*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4346*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4347b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 4348b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4349b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4350b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4351b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 4352*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4353b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 4354*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 4355b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB88_1 4356b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4357b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 4358b0a25468SMatt Arsenaultentry: 4359b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 4360b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 4361b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 4362b0a25468SMatt Arsenault ret void 4363b0a25468SMatt Arsenault} 4364b0a25468SMatt Arsenault 4365b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) { 4366b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_ret_addr64_offset: 4367b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 43686548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4369b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4370b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 4371b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 4372b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 4373b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 4374b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 4375b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 4376b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 4377*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 4378*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 4379*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 4380*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 4381b0a25468SMatt Arsenault; GFX7-NEXT: .LBB89_1: ; %atomicrmw.start 4382b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4383b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4384*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 4385*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 4386*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9] 4387*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4388*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4389*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 4390b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4391b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4392*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4393*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4394*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 4395b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB89_1 4396b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4397*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 4398*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 4399*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 4400*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4401b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 4402b0a25468SMatt Arsenault; 4403b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_ret_addr64_offset: 4404b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 44056548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4406b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4407b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 4408b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 4409b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 4410b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 4411b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 4412b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 4413b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 4414*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 4415*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 4416*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 4417*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 4418b0a25468SMatt Arsenault; GFX8-NEXT: .LBB89_1: ; %atomicrmw.start 4419b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4420b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4421*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 4422*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 4423*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9] 4424*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4425*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4426*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 4427b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4428b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4429*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4430*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4431*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 4432b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB89_1 4433b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4434*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 4435*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 4436*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 4437*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4438b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 4439b0a25468SMatt Arsenault; 4440b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_ret_addr64_offset: 4441b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 44426548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4443b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 44446548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 44456548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 44466548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 4447b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 4448b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 4449*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 4450*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 4451*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 4452*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 4453b0a25468SMatt Arsenault; GFX9-NEXT: .LBB89_1: ; %atomicrmw.start 4454b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4455b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4456*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 4457*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 4458*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9] 4459*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4460*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4461*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc 4462b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4463b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4464*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4465*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4466*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 4467b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB89_1 4468b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4469*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 4470*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 4471*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 4472*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4473b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 4474b0a25468SMatt Arsenaultentry: 4475b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 4476b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 4477b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 4478b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 4479b0a25468SMatt Arsenault ret void 4480b0a25468SMatt Arsenault} 4481b0a25468SMatt Arsenault 4482b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index) { 4483b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_addr64: 4484b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 44856548b635SShilei Tian; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 44866548b635SShilei Tian; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4487b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 44886548b635SShilei Tian; GFX7-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 4489b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s4 4490b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s5 4491*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s1 4492*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s0 4493*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 4494*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 4495*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s3 4496*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s2 4497b0a25468SMatt Arsenault; GFX7-NEXT: .LBB90_1: ; %atomicrmw.start 4498b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4499b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4500b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4501*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4502*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4503b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4504b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4505b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4506b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4507b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 4508*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4509b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 4510*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 4511b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB90_1 4512b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4513b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 4514b0a25468SMatt Arsenault; 4515b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_addr64: 4516b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 45176548b635SShilei Tian; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 45186548b635SShilei Tian; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4519b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 45206548b635SShilei Tian; GFX8-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 4521b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s4 4522b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s5 4523*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s1 4524*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s0 4525*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 4526*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 4527*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s3 4528*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s2 4529b0a25468SMatt Arsenault; GFX8-NEXT: .LBB90_1: ; %atomicrmw.start 4530b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4531b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4532b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4533*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4534*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4535b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4536b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4537b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4538b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4539b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 4540*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4541b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 4542*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 4543b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB90_1 4544b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4545b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 4546b0a25468SMatt Arsenault; 4547b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_addr64: 4548b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 45496548b635SShilei Tian; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 45506548b635SShilei Tian; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4551b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 45526548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 45536548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s0, s4 45546548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s1, s5 4555*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s1 4556*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s0 4557*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 4558*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 4559*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s3 4560*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s2 4561b0a25468SMatt Arsenault; GFX9-NEXT: .LBB90_1: ; %atomicrmw.start 4562b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4563b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 45646548b635SShilei Tian; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] 4565*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 4566*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 4567b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 4568b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4569b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4570b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 4571b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 4572*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4573b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 4574*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 4575b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB90_1 4576b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4577b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 4578b0a25468SMatt Arsenaultentry: 4579b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 4580b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 4581b0a25468SMatt Arsenault ret void 4582b0a25468SMatt Arsenault} 4583b0a25468SMatt Arsenault 4584b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) { 4585b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_ret_addr64: 4586b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 45876548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4588b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4589b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 4590b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 4591b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 4592b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 4593b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 4594*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 4595*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 4596*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 4597*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 4598b0a25468SMatt Arsenault; GFX7-NEXT: .LBB91_1: ; %atomicrmw.start 4599b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4600b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4601*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 4602*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 4603*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9] 4604*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4605*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4606*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 4607b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4608b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4609*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4610*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4611*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 4612b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB91_1 4613b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4614*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 4615*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 4616*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 4617*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4618b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 4619b0a25468SMatt Arsenault; 4620b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_ret_addr64: 4621b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 46226548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4623b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4624b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 4625b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 4626b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 4627b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 4628b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 4629*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 4630*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 4631*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 4632*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 4633b0a25468SMatt Arsenault; GFX8-NEXT: .LBB91_1: ; %atomicrmw.start 4634b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4635b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4636*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 4637*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 4638*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9] 4639*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4640*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4641*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 4642b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4643b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4644*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4645*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4646*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 4647b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB91_1 4648b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4649*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 4650*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 4651*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 4652*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4653b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 4654b0a25468SMatt Arsenault; 4655b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_ret_addr64: 4656b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 46576548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4658b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 46596548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 46606548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 46616548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 4662b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 4663b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 4664*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 4665*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 4666*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 4667*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 4668b0a25468SMatt Arsenault; GFX9-NEXT: .LBB91_1: ; %atomicrmw.start 4669b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4670b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4671*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 4672*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 4673*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9] 4674*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 4675*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 4676*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 4677b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4678b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4679*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 4680*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 4681*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 4682b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB91_1 4683b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4684*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 4685*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 4686*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 4687*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4688b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 4689b0a25468SMatt Arsenaultentry: 4690b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 4691b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 4692b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 4693b0a25468SMatt Arsenault ret void 4694b0a25468SMatt Arsenault} 4695b0a25468SMatt Arsenault 4696b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 4697b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory: 4698b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4699b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4700b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 4701b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 4702b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 4703b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4704b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 4705b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 4706b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 4707b0a25468SMatt Arsenault; GFX7-NEXT: .LBB92_1: ; %atomicrmw.start 4708b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4709b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4710b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 4711b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4712b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4713b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 4714b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4715b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4716b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 4717b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 4718b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4719b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 4720b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 4721b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB92_1 4722b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4723b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 4724b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4725b0a25468SMatt Arsenault; 4726b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory: 4727b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4728b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4729b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 4730b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 4731b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 4732b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4733b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 4734b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 4735b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 4736b0a25468SMatt Arsenault; GFX8-NEXT: .LBB92_1: ; %atomicrmw.start 4737b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4738b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4739b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 4740b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4741b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4742b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 4743b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4744b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4745b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 4746b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 4747b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4748b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 4749b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 4750b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB92_1 4751b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4752b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 4753b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4754b0a25468SMatt Arsenault; 4755b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory: 4756b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4757b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4758b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 4759b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 4760b0a25468SMatt Arsenault; GFX9-NEXT: .LBB92_1: ; %atomicrmw.start 4761b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4762b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4763b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 4764b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4765b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4766b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 4767b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4768b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4769b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 4770b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 4771b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4772b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 4773b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 4774b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB92_1 4775b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4776b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 4777b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4778b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 4779b0a25468SMatt Arsenault %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 4780b0a25468SMatt Arsenault ret void 4781b0a25468SMatt Arsenault} 4782b0a25468SMatt Arsenault 4783b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 4784b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory: 4785b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4786b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4787b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 4788b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4789b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 4790b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4791b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 4792b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 4793b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 4794b0a25468SMatt Arsenault; GFX7-NEXT: .LBB93_1: ; %atomicrmw.start 4795b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4796b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4797b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 4798b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 4799b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] 4800b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 4801b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 4802b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 4803b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4804b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4805b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4806b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4807b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 4808b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB93_1 4809b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4810b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 4811b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4812b0a25468SMatt Arsenault; 4813b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory: 4814b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4815b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4816b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 4817b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4818b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 4819b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4820b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 4821b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 4822b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 4823b0a25468SMatt Arsenault; GFX8-NEXT: .LBB93_1: ; %atomicrmw.start 4824b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4825b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4826b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 4827b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 4828b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] 4829b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 4830b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 4831b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 4832b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4833b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4834b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 4835b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4836b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 4837b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB93_1 4838b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4839b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 4840b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4841b0a25468SMatt Arsenault; 4842b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory: 4843b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4844b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4845b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 4846b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 4847b0a25468SMatt Arsenault; GFX9-NEXT: .LBB93_1: ; %atomicrmw.start 4848b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4849b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4850b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 4851b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 4852b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] 4853b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4854b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4855b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 4856b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4857b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4858b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 4859b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4860b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 4861b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB93_1 4862b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4863b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 4864b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 4865b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 4866b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4867b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 4868b0a25468SMatt Arsenault %result = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 4869b0a25468SMatt Arsenault ret i64 %result 4870b0a25468SMatt Arsenault} 4871b0a25468SMatt Arsenault 4872b0a25468SMatt Arsenault; --------------------------------------------------------------------- 4873b0a25468SMatt Arsenault; atomicrmw umax 4874b0a25468SMatt Arsenault; --------------------------------------------------------------------- 4875b0a25468SMatt Arsenault 4876b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret(ptr %ptr, i64 %in) { 4877b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret: 4878b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4879b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4880b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 4, v0 4881b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4882b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[0:1] 4883b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[4:5] 4884b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 4885b0a25468SMatt Arsenault; GFX7-NEXT: .LBB94_1: ; %atomicrmw.start 4886b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4887b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4888b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 4889b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4890b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4891b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 4892b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4893b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4894b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 4895b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 4896b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4897b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 4898b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 4899b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB94_1 4900b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4901b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 4902b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4903b0a25468SMatt Arsenault; 4904b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret: 4905b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4906b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4907b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 4, v0 4908b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4909b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[0:1] 4910b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[4:5] 4911b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 4912b0a25468SMatt Arsenault; GFX8-NEXT: .LBB94_1: ; %atomicrmw.start 4913b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 4914b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4915b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 4916b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4917b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4918b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 4919b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4920b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 4921b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 4922b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 4923b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4924b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 4925b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 4926b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB94_1 4927b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 4928b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 4929b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 4930b0a25468SMatt Arsenault; 4931b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret: 4932b0a25468SMatt Arsenault; GFX9: ; %bb.0: 4933b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4934b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] 4935b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 4936b0a25468SMatt Arsenault; GFX9-NEXT: .LBB94_1: ; %atomicrmw.start 4937b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 4938b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4939b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 4940b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4941b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4942b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 4943b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4944b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 4945b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 4946b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 4947b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4948b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 4949b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 4950b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB94_1 4951b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 4952b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 4953b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 4954b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 4955b0a25468SMatt Arsenault ret void 4956b0a25468SMatt Arsenault} 4957b0a25468SMatt Arsenault 4958b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret_offset(ptr %out, i64 %in) { 4959b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset: 4960b0a25468SMatt Arsenault; GFX7: ; %bb.0: 4961b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4962b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 4963b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 4964b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 4965b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4966b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 4967b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 4968b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 4969b0a25468SMatt Arsenault; GFX7-NEXT: .LBB95_1: ; %atomicrmw.start 4970b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 4971b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4972b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 4973b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 4974b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 4975b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 4976b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4977b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 4978b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 4979b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 4980b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 4981b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 4982b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 4983b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB95_1 4984b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 4985b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 4986b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 4987b0a25468SMatt Arsenault; 4988b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset: 4989b0a25468SMatt Arsenault; GFX8: ; %bb.0: 4990b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4991b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 4992b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 4993b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 4994b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4995b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 4996b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 4997b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 4998b0a25468SMatt Arsenault; GFX8-NEXT: .LBB95_1: ; %atomicrmw.start 4999b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5000b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5001b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5002b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5003b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5004b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 5005b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5006b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5007b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 5008b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 5009b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5010b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 5011b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 5012b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB95_1 5013b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5014b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 5015b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5016b0a25468SMatt Arsenault; 5017b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset: 5018b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5019b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5020b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 5021b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 5022b0a25468SMatt Arsenault; GFX9-NEXT: .LBB95_1: ; %atomicrmw.start 5023b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5024b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5025b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5026b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5027b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5028b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 5029b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5030b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5031b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 5032b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 5033b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5034b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 5035b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 5036b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB95_1 5037b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5038b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 5039b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5040b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 5041b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5042b0a25468SMatt Arsenault ret void 5043b0a25468SMatt Arsenault} 5044b0a25468SMatt Arsenault 5045b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret(ptr %ptr, i64 %in) { 5046b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret: 5047b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5048b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5049b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v5, vcc, 4, v0 5050b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 5051b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v4, v[0:1] 5052b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v5, v[5:6] 5053b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 5054b0a25468SMatt Arsenault; GFX7-NEXT: .LBB96_1: ; %atomicrmw.start 5055b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5056b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5057b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 5058b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 5059b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5060b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5061b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5062b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 5063b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5064b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5065b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 5066b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5067b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 5068b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB96_1 5069b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5070b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 5071b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v4 5072b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v5 5073b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5074b0a25468SMatt Arsenault; 5075b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret: 5076b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5077b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5078b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 5079b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 5080b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 5081b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 5082b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 5083b0a25468SMatt Arsenault; GFX8-NEXT: .LBB96_1: ; %atomicrmw.start 5084b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5085b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5086b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 5087b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 5088b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5089b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5090b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5091b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 5092b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5093b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5094b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 5095b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5096b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 5097b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB96_1 5098b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5099b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 5100b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 5101b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 5102b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5103b0a25468SMatt Arsenault; 5104b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret: 5105b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5106b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5107b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 5108b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 5109b0a25468SMatt Arsenault; GFX9-NEXT: .LBB96_1: ; %atomicrmw.start 5110b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5111b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5112b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 5113b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 5114b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5115b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5116b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5117b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 5118b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5119b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5120b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 5121b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5122b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 5123b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB96_1 5124b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5125b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 5126b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 5127b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 5128b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5129b0a25468SMatt Arsenault %result = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 5130b0a25468SMatt Arsenault ret i64 %result 5131b0a25468SMatt Arsenault} 5132b0a25468SMatt Arsenault 5133b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret_offset(ptr %out, i64 %in) { 5134b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset: 5135b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5136b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5137b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 5138b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 5139b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 5140b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5141b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 5142b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 5143b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 5144b0a25468SMatt Arsenault; GFX7-NEXT: .LBB97_1: ; %atomicrmw.start 5145b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5146b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5147b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 5148b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 5149b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] 5150b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 5151b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 5152b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 5153b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5154b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5155b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5156b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5157b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 5158b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB97_1 5159b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5160b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 5161b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5162b0a25468SMatt Arsenault; 5163b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset: 5164b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5165b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5166b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 5167b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 5168b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 5169b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5170b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 5171b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 5172b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 5173b0a25468SMatt Arsenault; GFX8-NEXT: .LBB97_1: ; %atomicrmw.start 5174b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5175b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5176b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 5177b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 5178b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] 5179b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 5180b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 5181b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 5182b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5183b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5184b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5185b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5186b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 5187b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB97_1 5188b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5189b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 5190b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5191b0a25468SMatt Arsenault; 5192b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset: 5193b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5194b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5195b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 5196b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 5197b0a25468SMatt Arsenault; GFX9-NEXT: .LBB97_1: ; %atomicrmw.start 5198b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5199b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5200b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 5201b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 5202b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5203b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5204b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5205b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 5206b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5207b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5208b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 5209b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5210b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 5211b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB97_1 5212b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5213b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 5214b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 5215b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 5216b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5217b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 5218b0a25468SMatt Arsenault %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5219b0a25468SMatt Arsenault ret i64 %result 5220b0a25468SMatt Arsenault} 5221b0a25468SMatt Arsenault 5222b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 5223b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_scalar: 5224b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5225b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5226b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 5227b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 5228b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 5229b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 5230b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s34 5231b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s35 5232b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[0:1] 5233b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[3:4] 5234*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s4 5235b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 5236*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 5237*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 5238*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s5 5239b0a25468SMatt Arsenault; GFX7-NEXT: .LBB98_1: ; %atomicrmw.start 5240b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5241b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5242b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5243*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5244*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5245b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5246b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5247b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5248b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5249b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 5250b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5251b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 5252b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 5253b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB98_1 5254b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5255b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 5256b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5257b0a25468SMatt Arsenault; 5258b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_scalar: 5259b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5260b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5261b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 5262b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 5263b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 5264b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 5265b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s34 5266b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s35 5267b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[0:1] 5268b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[3:4] 5269*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s4 5270b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 5271*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 5272*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 5273*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s5 5274b0a25468SMatt Arsenault; GFX8-NEXT: .LBB98_1: ; %atomicrmw.start 5275b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5276b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5277b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5278*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5279*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5280b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5281b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5282b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5283b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5284b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 5285b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5286b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 5287b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 5288b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB98_1 5289b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5290b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 5291b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5292b0a25468SMatt Arsenault; 5293b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_scalar: 5294b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5295b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5296b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 5297b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 5298b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5299*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 5300b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 5301*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 5302*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 5303*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 5304b0a25468SMatt Arsenault; GFX9-NEXT: .LBB98_1: ; %atomicrmw.start 5305b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5306b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5307b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5308*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5309*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5310b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5311b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5312b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5313b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5314b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 5315b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5316b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 5317b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 5318b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB98_1 5319b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5320b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 5321b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5322b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 5323b0a25468SMatt Arsenault ret void 5324b0a25468SMatt Arsenault} 5325b0a25468SMatt Arsenault 5326b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 5327b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset_scalar: 5328b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5329b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5330b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 5331b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 5332b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 5333b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 5334b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 5335b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 5336b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s34 5337b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v5, s35 5338b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[0:1] 5339b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[4:5] 5340*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 5341*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 5342*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 5343b0a25468SMatt Arsenault; GFX7-NEXT: .LBB99_1: ; %atomicrmw.start 5344b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5345b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5346b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5347*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5348*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5349b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5350b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5351b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5352b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5353b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 5354*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5355b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 5356*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 5357b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB99_1 5358b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5359*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 5360b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5361b0a25468SMatt Arsenault; 5362b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset_scalar: 5363b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5364b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5365b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 5366b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 5367b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 5368b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 5369b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 5370b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 5371b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s34 5372b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v5, s35 5373b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 5374b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[4:5] 5375*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 5376*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 5377*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 5378b0a25468SMatt Arsenault; GFX8-NEXT: .LBB99_1: ; %atomicrmw.start 5379b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5380b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5381b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5382*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5383*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5384b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5385b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5386b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5387b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5388b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 5389*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5390b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 5391*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 5392b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB99_1 5393b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5394*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 5395b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5396b0a25468SMatt Arsenault; 5397b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset_scalar: 5398b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5399b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5400b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 5401b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 5402b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 5403*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 5404b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 5405*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 5406*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 5407*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 5408b0a25468SMatt Arsenault; GFX9-NEXT: .LBB99_1: ; %atomicrmw.start 5409b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5410b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5411b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 5412*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5413*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5414b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 5415b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5416b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5417b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5418b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 5419b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5420b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 5421b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 5422b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB99_1 5423b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5424b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 5425b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5426b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 5427b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5428b0a25468SMatt Arsenault ret void 5429b0a25468SMatt Arsenault} 5430b0a25468SMatt Arsenault 5431b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 5432b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_scalar: 5433b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5434b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5435b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 5436b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 5437b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 5438b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 5439b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 5440b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 5441b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[0:1] 5442b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[2:3] 5443*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v2, s4 5444b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 5445*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 5446*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 5447*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v3, s5 5448b0a25468SMatt Arsenault; GFX7-NEXT: .LBB100_1: ; %atomicrmw.start 5449b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5450b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5451*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 5452*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 5453*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5454*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5455*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5456*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 5457b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5458b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5459*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5460b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5461b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 5462b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB100_1 5463b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5464b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 5465b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5466b0a25468SMatt Arsenault; 5467b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_scalar: 5468b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5469b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5470b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 5471b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 5472b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 5473b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 5474b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 5475b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 5476b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[0:1] 5477b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[2:3] 5478*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s4 5479b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 5480*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 5481*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 5482*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, s5 5483b0a25468SMatt Arsenault; GFX8-NEXT: .LBB100_1: ; %atomicrmw.start 5484b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5485b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5486*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 5487*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 5488*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5489*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5490*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5491*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 5492b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5493b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5494*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5495b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5496b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 5497b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB100_1 5498b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5499b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 5500b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5501b0a25468SMatt Arsenault; 5502b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_scalar: 5503b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5504b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5505b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 5506b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 5507b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 5508*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 5509b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 5510*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 5511*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 5512*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 5513b0a25468SMatt Arsenault; GFX9-NEXT: .LBB100_1: ; %atomicrmw.start 5514b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5515b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5516*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 5517*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 5518*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5519*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5520*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5521*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 5522b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5523b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5524*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5525b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5526b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 5527b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB100_1 5528b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5529b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 5530b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5531b0a25468SMatt Arsenault %result = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 5532b0a25468SMatt Arsenault ret i64 %result 5533b0a25468SMatt Arsenault} 5534b0a25468SMatt Arsenault 5535b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 5536b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset_scalar: 5537b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5538b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5539b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 5540b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 5541b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 5542b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 5543b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 5544b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 5545b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 5546b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 5547b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 5548b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[2:3] 5549*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 5550*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 5551*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 5552b0a25468SMatt Arsenault; GFX7-NEXT: .LBB101_1: ; %atomicrmw.start 5553b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5554b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5555*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 5556*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 5557*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5558*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5559*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5560*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 5561b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5562b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5563*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5564*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5565*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 5566b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB101_1 5567b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5568*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 5569b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5570b0a25468SMatt Arsenault; 5571b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset_scalar: 5572b0a25468SMatt Arsenault; GFX8: ; %bb.0: 5573b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5574b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 5575b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 5576b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 5577b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 5578b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 5579b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 5580b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 5581b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 5582b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 5583b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[2:3] 5584*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 5585*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 5586*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 5587b0a25468SMatt Arsenault; GFX8-NEXT: .LBB101_1: ; %atomicrmw.start 5588b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5589b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5590*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 5591*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 5592*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5593*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5594*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5595*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 5596b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5597b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5598*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5599*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5600*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 5601b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB101_1 5602b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5603*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 5604b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 5605b0a25468SMatt Arsenault; 5606b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset_scalar: 5607b0a25468SMatt Arsenault; GFX9: ; %bb.0: 5608b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5609b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 5610b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 5611b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] offset:32 5612*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 5613b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 5614*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 5615*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 5616*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 5617b0a25468SMatt Arsenault; GFX9-NEXT: .LBB101_1: ; %atomicrmw.start 5618b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5619b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5620*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 5621*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 5622*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 5623*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5624*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5625*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc 5626b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5627b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5628*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 5629b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 5630b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 5631b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB101_1 5632b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5633b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 5634b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 5635b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 5636b0a25468SMatt Arsenault %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5637b0a25468SMatt Arsenault ret i64 %result 5638b0a25468SMatt Arsenault} 5639b0a25468SMatt Arsenault 5640b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64 %index) { 5641b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_addr64_offset: 5642b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 56436548b635SShilei Tian; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 56446548b635SShilei Tian; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5645b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 56466548b635SShilei Tian; GFX7-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 5647b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s4 5648b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s5 5649b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 5650b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 5651*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s1 5652*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s0 5653*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 5654*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 5655*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s3 5656*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s2 5657b0a25468SMatt Arsenault; GFX7-NEXT: .LBB102_1: ; %atomicrmw.start 5658b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5659b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5660b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3] 5661*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5662*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5663b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5664b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5665b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5666b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5667b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 5668*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5669b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 5670*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 5671b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB102_1 5672b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5673b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 5674b0a25468SMatt Arsenault; 5675b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_addr64_offset: 5676b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 56776548b635SShilei Tian; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 56786548b635SShilei Tian; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5679b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 56806548b635SShilei Tian; GFX8-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 5681b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s4 5682b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s5 5683b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 5684b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 5685*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s1 5686*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s0 5687*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 5688*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 5689*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s3 5690*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s2 5691b0a25468SMatt Arsenault; GFX8-NEXT: .LBB102_1: ; %atomicrmw.start 5692b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5693b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5694b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3] 5695*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5696*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5697b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 5698b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5699b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5700b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5701b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 5702*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5703b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 5704*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 5705b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB102_1 5706b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5707b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 5708b0a25468SMatt Arsenault; 5709b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_addr64_offset: 5710b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 57116548b635SShilei Tian; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 57126548b635SShilei Tian; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5713b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 57146548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 57156548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s0, s4 57166548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s1, s5 5717*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s1 5718*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s0 5719*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[4:5] offset:32 5720*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 5721*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s3 5722*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s2 5723b0a25468SMatt Arsenault; GFX9-NEXT: .LBB102_1: ; %atomicrmw.start 5724b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5725b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 57266548b635SShilei Tian; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3] 5727*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 5728*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 5729b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 5730b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5731b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5732b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 5733b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 5734*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5735b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 5736*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 5737b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB102_1 5738b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5739b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 5740b0a25468SMatt Arsenaultentry: 5741b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 5742b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 5743b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5744b0a25468SMatt Arsenault ret void 5745b0a25468SMatt Arsenault} 5746b0a25468SMatt Arsenault 5747b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) { 5748b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_ret_addr64_offset: 5749b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 57506548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5751b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5752b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 5753b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 5754b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 5755b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 5756b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 5757b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 5758b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 5759*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5760*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 5761*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 5762*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 5763b0a25468SMatt Arsenault; GFX7-NEXT: .LBB103_1: ; %atomicrmw.start 5764b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5765b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5766*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 5767*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 5768*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9] 5769*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5770*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5771*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 5772b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5773b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5774*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5775*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5776*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 5777b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB103_1 5778b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5779*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 5780*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 5781*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 5782*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5783b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 5784b0a25468SMatt Arsenault; 5785b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_ret_addr64_offset: 5786b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 57876548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5788b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5789b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 5790b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 5791b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 5792b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 5793b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 5794b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 5795b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 5796*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5797*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 5798*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 5799*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 5800b0a25468SMatt Arsenault; GFX8-NEXT: .LBB103_1: ; %atomicrmw.start 5801b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5802b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5803*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 5804*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 5805*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9] 5806*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5807*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5808*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 5809b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5810b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5811*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5812*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5813*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 5814b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB103_1 5815b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5816*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 5817*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 5818*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 5819*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5820b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 5821b0a25468SMatt Arsenault; 5822b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_ret_addr64_offset: 5823b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 58246548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5825b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 58266548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 58276548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 58286548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 5829b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 5830b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 5831*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 5832*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 5833*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 5834*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 5835b0a25468SMatt Arsenault; GFX9-NEXT: .LBB103_1: ; %atomicrmw.start 5836b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5837b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5838*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 5839*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 5840*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9] 5841*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5842*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5843*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc 5844b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5845b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5846*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5847*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5848*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 5849b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB103_1 5850b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5851*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 5852*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 5853*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 5854*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5855b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 5856b0a25468SMatt Arsenaultentry: 5857b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 5858b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 5859b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 5860b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 5861b0a25468SMatt Arsenault ret void 5862b0a25468SMatt Arsenault} 5863b0a25468SMatt Arsenault 5864b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) { 5865b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_ret_addr64: 5866b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 58676548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5868b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5869b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 5870b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 5871b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 5872b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 5873b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 5874*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5875*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 5876*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 5877*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 5878b0a25468SMatt Arsenault; GFX7-NEXT: .LBB104_1: ; %atomicrmw.start 5879b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5880b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5881*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 5882*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 5883*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9] 5884*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5885*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5886*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 5887b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5888b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5889*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5890*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5891*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 5892b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB104_1 5893b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 5894*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 5895*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 5896*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 5897*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5898b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 5899b0a25468SMatt Arsenault; 5900b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_ret_addr64: 5901b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 59026548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5903b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5904b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 5905b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 5906b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 5907b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 5908b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 5909*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5910*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 5911*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 5912*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 5913b0a25468SMatt Arsenault; GFX8-NEXT: .LBB104_1: ; %atomicrmw.start 5914b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 5915b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5916*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 5917*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 5918*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9] 5919*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5920*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5921*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 5922b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5923b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 5924*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5925*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5926*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 5927b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB104_1 5928b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 5929*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 5930*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 5931*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 5932*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5933b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 5934b0a25468SMatt Arsenault; 5935b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_ret_addr64: 5936b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 59376548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5938b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 59396548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 59406548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 59416548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 5942b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 5943b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 5944*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 5945*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 5946*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 5947*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 5948b0a25468SMatt Arsenault; GFX9-NEXT: .LBB104_1: ; %atomicrmw.start 5949b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 5950b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5951*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 5952*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 5953*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9] 5954*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 5955*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 5956*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 5957b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5958b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 5959*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 5960*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 5961*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 5962b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB104_1 5963b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 5964*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 5965*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 5966*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 5967*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5968b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 5969b0a25468SMatt Arsenaultentry: 5970b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 5971b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 5972b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 5973b0a25468SMatt Arsenault ret void 5974b0a25468SMatt Arsenault} 5975b0a25468SMatt Arsenault 5976b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 5977b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory: 5978b0a25468SMatt Arsenault; GFX7: ; %bb.0: 5979b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5980b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 5981b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5982b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 5983b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5984b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 5985b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 5986b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 5987b0a25468SMatt Arsenault; GFX7-NEXT: .LBB105_1: ; %atomicrmw.start 5988b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 5989b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5990b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 5991b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 5992b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 5993b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 5994b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5995b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 5996b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 5997b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 5998b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 5999b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 6000b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6001b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB105_1 6002b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6003b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6004b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6005b0a25468SMatt Arsenault; 6006b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory: 6007b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6008b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6009b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 6010b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 6011b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 6012b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6013b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 6014b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 6015b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6016b0a25468SMatt Arsenault; GFX8-NEXT: .LBB105_1: ; %atomicrmw.start 6017b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6018b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6019b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 6020b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6021b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6022b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 6023b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6024b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6025b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 6026b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 6027b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6028b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 6029b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6030b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB105_1 6031b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6032b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6033b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6034b0a25468SMatt Arsenault; 6035b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory: 6036b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6037b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6038b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 6039b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6040b0a25468SMatt Arsenault; GFX9-NEXT: .LBB105_1: ; %atomicrmw.start 6041b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6042b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6043b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 6044b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6045b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6046b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 6047b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6048b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6049b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6050b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6051b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6052b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6053b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6054b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB105_1 6055b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6056b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6057b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6058b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6059b0a25468SMatt Arsenault %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 6060b0a25468SMatt Arsenault ret void 6061b0a25468SMatt Arsenault} 6062b0a25468SMatt Arsenault 6063b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 6064b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory: 6065b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6066b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6067b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 6068b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6069b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 6070b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6071b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 6072b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 6073b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6074b0a25468SMatt Arsenault; GFX7-NEXT: .LBB106_1: ; %atomicrmw.start 6075b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6076b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6077b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 6078b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 6079b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] 6080b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 6081b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 6082b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 6083b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6084b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6085b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6086b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6087b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6088b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB106_1 6089b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6090b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6091b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6092b0a25468SMatt Arsenault; 6093b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory: 6094b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6095b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6096b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 6097b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6098b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 6099b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6100b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 6101b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 6102b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6103b0a25468SMatt Arsenault; GFX8-NEXT: .LBB106_1: ; %atomicrmw.start 6104b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6105b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6106b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 6107b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 6108b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] 6109b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 6110b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 6111b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 6112b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6113b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6114b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6115b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6116b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6117b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB106_1 6118b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6119b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6120b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6121b0a25468SMatt Arsenault; 6122b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory: 6123b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6124b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6125b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 6126b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6127b0a25468SMatt Arsenault; GFX9-NEXT: .LBB106_1: ; %atomicrmw.start 6128b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6129b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6130b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6131b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6132b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] 6133b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6134b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6135b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 6136b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6137b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6138b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6139b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6140b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6141b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB106_1 6142b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6143b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6144b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 6145b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 6146b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6147b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6148b0a25468SMatt Arsenault %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 6149b0a25468SMatt Arsenault ret i64 %result 6150b0a25468SMatt Arsenault} 6151b0a25468SMatt Arsenault 6152b0a25468SMatt Arsenault; --------------------------------------------------------------------- 6153b0a25468SMatt Arsenault; atomicrmw umin 6154b0a25468SMatt Arsenault; --------------------------------------------------------------------- 6155b0a25468SMatt Arsenault 6156b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret(ptr %ptr, i64 %in) { 6157b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret: 6158b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6159b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6160b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 4, v0 6161b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6162b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[0:1] 6163b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[4:5] 6164b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6165b0a25468SMatt Arsenault; GFX7-NEXT: .LBB107_1: ; %atomicrmw.start 6166b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6167b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6168b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6169b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6170b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6171b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6172b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6173b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6174b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6175b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 6176b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6177b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 6178b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6179b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB107_1 6180b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6181b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6182b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6183b0a25468SMatt Arsenault; 6184b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret: 6185b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6186b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6187b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 4, v0 6188b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6189b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[0:1] 6190b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[4:5] 6191b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6192b0a25468SMatt Arsenault; GFX8-NEXT: .LBB107_1: ; %atomicrmw.start 6193b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6194b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6195b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6196b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6197b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6198b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6199b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6200b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6201b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6202b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 6203b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6204b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 6205b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6206b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB107_1 6207b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6208b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6209b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6210b0a25468SMatt Arsenault; 6211b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret: 6212b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6213b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6214b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] 6215b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6216b0a25468SMatt Arsenault; GFX9-NEXT: .LBB107_1: ; %atomicrmw.start 6217b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6218b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6219b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6220b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6221b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6222b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6223b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6224b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6225b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6226b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6227b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6228b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6229b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6230b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB107_1 6231b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6232b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6233b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6234b0a25468SMatt Arsenault %tmp0 = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 6235b0a25468SMatt Arsenault ret void 6236b0a25468SMatt Arsenault} 6237b0a25468SMatt Arsenault 6238b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret_offset(ptr %out, i64 %in) { 6239b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset: 6240b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6241b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6242b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 6243b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 6244b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 6245b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6246b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 6247b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 6248b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6249b0a25468SMatt Arsenault; GFX7-NEXT: .LBB108_1: ; %atomicrmw.start 6250b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6251b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6252b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6253b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6254b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6255b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 6256b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6257b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6258b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 6259b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 6260b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6261b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 6262b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6263b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB108_1 6264b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6265b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6266b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6267b0a25468SMatt Arsenault; 6268b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset: 6269b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6270b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6271b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 6272b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 6273b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 6274b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6275b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 6276b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 6277b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6278b0a25468SMatt Arsenault; GFX8-NEXT: .LBB108_1: ; %atomicrmw.start 6279b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6280b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6281b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6282b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6283b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6284b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 6285b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6286b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6287b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 6288b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 6289b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6290b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 6291b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6292b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB108_1 6293b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6294b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6295b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6296b0a25468SMatt Arsenault; 6297b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset: 6298b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6299b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6300b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 6301b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6302b0a25468SMatt Arsenault; GFX9-NEXT: .LBB108_1: ; %atomicrmw.start 6303b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6304b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6305b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6306b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6307b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6308b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 6309b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6310b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6311b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6312b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6313b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6314b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6315b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6316b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB108_1 6317b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6318b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6319b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6320b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6321b0a25468SMatt Arsenault %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 6322b0a25468SMatt Arsenault ret void 6323b0a25468SMatt Arsenault} 6324b0a25468SMatt Arsenault 6325b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret(ptr %ptr, i64 %in) { 6326b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret: 6327b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6328b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6329b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v5, vcc, 4, v0 6330b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 6331b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v4, v[0:1] 6332b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v5, v[5:6] 6333b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6334b0a25468SMatt Arsenault; GFX7-NEXT: .LBB109_1: ; %atomicrmw.start 6335b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6336b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6337b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 6338b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 6339b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6340b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6341b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6342b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6343b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6344b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6345b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6346b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6347b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6348b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB109_1 6349b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6350b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6351b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v4 6352b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v5 6353b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6354b0a25468SMatt Arsenault; 6355b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret: 6356b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6357b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6358b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 6359b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 6360b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 6361b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 6362b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6363b0a25468SMatt Arsenault; GFX8-NEXT: .LBB109_1: ; %atomicrmw.start 6364b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6365b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6366b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 6367b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 6368b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6369b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6370b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6371b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6372b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6373b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6374b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6375b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6376b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6377b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB109_1 6378b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6379b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6380b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 6381b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 6382b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6383b0a25468SMatt Arsenault; 6384b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret: 6385b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6386b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6387b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 6388b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6389b0a25468SMatt Arsenault; GFX9-NEXT: .LBB109_1: ; %atomicrmw.start 6390b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6391b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6392b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6393b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6394b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6395b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6396b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6397b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 6398b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6399b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6400b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6401b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6402b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6403b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB109_1 6404b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6405b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6406b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 6407b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 6408b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6409b0a25468SMatt Arsenault %result = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 6410b0a25468SMatt Arsenault ret i64 %result 6411b0a25468SMatt Arsenault} 6412b0a25468SMatt Arsenault 6413b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret_offset(ptr %out, i64 %in) { 6414b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset: 6415b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6416b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6417b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 6418b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6419b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 6420b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6421b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 6422b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 6423b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6424b0a25468SMatt Arsenault; GFX7-NEXT: .LBB110_1: ; %atomicrmw.start 6425b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6426b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6427b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 6428b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 6429b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] 6430b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 6431b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 6432b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 6433b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6434b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6435b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6436b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6437b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6438b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB110_1 6439b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6440b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6441b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6442b0a25468SMatt Arsenault; 6443b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset: 6444b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6445b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6446b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 6447b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 6448b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 6449b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6450b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 6451b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 6452b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6453b0a25468SMatt Arsenault; GFX8-NEXT: .LBB110_1: ; %atomicrmw.start 6454b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6455b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6456b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 6457b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 6458b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] 6459b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 6460b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 6461b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 6462b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6463b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6464b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6465b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6466b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6467b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB110_1 6468b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6469b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6470b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6471b0a25468SMatt Arsenault; 6472b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset: 6473b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6474b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6475b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 6476b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6477b0a25468SMatt Arsenault; GFX9-NEXT: .LBB110_1: ; %atomicrmw.start 6478b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6479b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6480b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6481b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6482b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6483b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6484b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6485b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 6486b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6487b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6488b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6489b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6490b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6491b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB110_1 6492b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6493b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 6494b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 6495b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 6496b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6497b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6498b0a25468SMatt Arsenault %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 6499b0a25468SMatt Arsenault ret i64 %result 6500b0a25468SMatt Arsenault} 6501b0a25468SMatt Arsenault 6502b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 6503b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_scalar: 6504b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6505b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6506b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 6507b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 6508b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 6509b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 6510b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s34 6511b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s35 6512b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[0:1] 6513b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[3:4] 6514*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s4 6515b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 6516*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 6517*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 6518*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s5 6519b0a25468SMatt Arsenault; GFX7-NEXT: .LBB111_1: ; %atomicrmw.start 6520b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6521b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6522b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6523*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6524*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6525b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 6526b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6527b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6528b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6529b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 6530b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6531b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 6532b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 6533b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB111_1 6534b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6535b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 6536b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6537b0a25468SMatt Arsenault; 6538b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_scalar: 6539b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6540b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6541b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 6542b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 6543b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 6544b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 6545b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s34 6546b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s35 6547b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[0:1] 6548b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[3:4] 6549*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s4 6550b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 6551*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 6552*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 6553*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s5 6554b0a25468SMatt Arsenault; GFX8-NEXT: .LBB111_1: ; %atomicrmw.start 6555b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6556b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6557b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6558*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6559*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6560b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 6561b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6562b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6563b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6564b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 6565b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6566b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 6567b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 6568b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB111_1 6569b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6570b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 6571b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6572b0a25468SMatt Arsenault; 6573b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_scalar: 6574b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6575b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6576b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 6577b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 6578b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 6579*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 6580b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 6581*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 6582*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 6583*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 6584b0a25468SMatt Arsenault; GFX9-NEXT: .LBB111_1: ; %atomicrmw.start 6585b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6586b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6587b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6588*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6589*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6590b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 6591b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6592b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6593b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6594b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 6595b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6596b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 6597b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 6598b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB111_1 6599b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6600b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 6601b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6602b0a25468SMatt Arsenault %tmp0 = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 6603b0a25468SMatt Arsenault ret void 6604b0a25468SMatt Arsenault} 6605b0a25468SMatt Arsenault 6606b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 6607b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset_scalar: 6608b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6609b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6610b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 6611b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 6612b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 6613b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 6614b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 6615b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 6616b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s34 6617b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v5, s35 6618b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[0:1] 6619b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[4:5] 6620*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 6621*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 6622*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 6623b0a25468SMatt Arsenault; GFX7-NEXT: .LBB112_1: ; %atomicrmw.start 6624b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6625b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6626b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6627*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6628*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6629b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 6630b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6631b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6632b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6633b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 6634*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6635b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 6636*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 6637b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB112_1 6638b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6639*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 6640b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6641b0a25468SMatt Arsenault; 6642b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset_scalar: 6643b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6644b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6645b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 6646b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 6647b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 6648b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 6649b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 6650b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 6651b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s34 6652b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v5, s35 6653b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 6654b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[4:5] 6655*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 6656*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 6657*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 6658b0a25468SMatt Arsenault; GFX8-NEXT: .LBB112_1: ; %atomicrmw.start 6659b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6660b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6661b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6662*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6663*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6664b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 6665b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6666b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6667b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6668b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 6669*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6670b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 6671*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 6672b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB112_1 6673b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6674*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 6675b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6676b0a25468SMatt Arsenault; 6677b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset_scalar: 6678b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6679b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6680b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 6681b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 6682b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 6683*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 6684b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 6685*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 6686*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 6687*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 6688b0a25468SMatt Arsenault; GFX9-NEXT: .LBB112_1: ; %atomicrmw.start 6689b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6690b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6691b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3] 6692*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 6693*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 6694b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 6695b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6696b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6697b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 6698b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 6699b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6700b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 6701b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 6702b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB112_1 6703b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6704b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 6705b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6706b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6707b0a25468SMatt Arsenault %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 6708b0a25468SMatt Arsenault ret void 6709b0a25468SMatt Arsenault} 6710b0a25468SMatt Arsenault 6711b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 6712b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_scalar: 6713b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6714b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6715b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 6716b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 6717b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 6718b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 6719b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 6720b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 6721b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[0:1] 6722b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[2:3] 6723*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v2, s4 6724b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 6725*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 6726*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 6727*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v3, s5 6728b0a25468SMatt Arsenault; GFX7-NEXT: .LBB113_1: ; %atomicrmw.start 6729b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6730b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6731*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 6732*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 6733*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6734*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6735*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6736*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 6737b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6738b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6739*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6740b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6741b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 6742b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB113_1 6743b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6744b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 6745b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6746b0a25468SMatt Arsenault; 6747b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_scalar: 6748b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6749b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6750b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 6751b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 6752b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 6753b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 6754b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 6755b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 6756b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[0:1] 6757b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[2:3] 6758*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s4 6759b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 6760*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 6761*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 6762*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, s5 6763b0a25468SMatt Arsenault; GFX8-NEXT: .LBB113_1: ; %atomicrmw.start 6764b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6765b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6766*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 6767*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 6768*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6769*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6770*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6771*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 6772b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6773b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6774*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6775b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6776b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 6777b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB113_1 6778b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6779b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 6780b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6781b0a25468SMatt Arsenault; 6782b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_scalar: 6783b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6784b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6785b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 6786b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 6787b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 6788*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 6789b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 6790*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 6791*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 6792*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 6793b0a25468SMatt Arsenault; GFX9-NEXT: .LBB113_1: ; %atomicrmw.start 6794b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6795b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6796*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 6797*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 6798*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6799*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6800*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6801*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 6802b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6803b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6804*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6805b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6806b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 6807b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB113_1 6808b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6809b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 6810b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6811b0a25468SMatt Arsenault %result = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 6812b0a25468SMatt Arsenault ret i64 %result 6813b0a25468SMatt Arsenault} 6814b0a25468SMatt Arsenault 6815b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 6816b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset_scalar: 6817b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6818b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6819b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 6820b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 6821b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 6822b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 6823b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 6824b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 6825b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 6826b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 6827b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 6828b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[2:3] 6829*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 6830*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 6831*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 6832b0a25468SMatt Arsenault; GFX7-NEXT: .LBB114_1: ; %atomicrmw.start 6833b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6834b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6835*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 6836*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 6837*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6838*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6839*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6840*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 6841b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6842b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6843*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6844*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6845*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 6846b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB114_1 6847b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6848*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 6849b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6850b0a25468SMatt Arsenault; 6851b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset_scalar: 6852b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6853b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6854b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 6855b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 6856b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 6857b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 6858b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 6859b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 6860b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 6861b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 6862b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 6863b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[2:3] 6864*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 6865*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 6866*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 6867b0a25468SMatt Arsenault; GFX8-NEXT: .LBB114_1: ; %atomicrmw.start 6868b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6869b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6870*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 6871*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 6872*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6873*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6874*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6875*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 6876b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6877b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6878*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6879*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6880*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 6881b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB114_1 6882b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6883*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 6884b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6885b0a25468SMatt Arsenault; 6886b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset_scalar: 6887b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6888b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6889b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 6890b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 6891b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] offset:32 6892*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 6893b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 6894*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 6895*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 6896*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 6897b0a25468SMatt Arsenault; GFX9-NEXT: .LBB114_1: ; %atomicrmw.start 6898b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6899b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6900*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 6901*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 6902*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9] 6903*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 6904*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 6905*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc 6906b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6907b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6908*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 6909b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 6910b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 6911b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB114_1 6912b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 6913b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 6914b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6915b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 6916b0a25468SMatt Arsenault %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 6917b0a25468SMatt Arsenault ret i64 %result 6918b0a25468SMatt Arsenault} 6919b0a25468SMatt Arsenault 6920b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 6921b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory: 6922b0a25468SMatt Arsenault; GFX7: ; %bb.0: 6923b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6924b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 6925b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 6926b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 6927b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6928b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 6929b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 6930b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 6931b0a25468SMatt Arsenault; GFX7-NEXT: .LBB115_1: ; %atomicrmw.start 6932b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 6933b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6934b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6935b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6936b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6937b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 6938b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6939b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 6940b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 6941b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 6942b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6943b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 6944b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 6945b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB115_1 6946b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 6947b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 6948b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 6949b0a25468SMatt Arsenault; 6950b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory: 6951b0a25468SMatt Arsenault; GFX8: ; %bb.0: 6952b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6953b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 6954b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 6955b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 6956b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6957b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 6958b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 6959b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 6960b0a25468SMatt Arsenault; GFX8-NEXT: .LBB115_1: ; %atomicrmw.start 6961b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 6962b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6963b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6964b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6965b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6966b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 6967b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6968b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 6969b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 6970b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 6971b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6972b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 6973b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 6974b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB115_1 6975b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 6976b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 6977b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6978b0a25468SMatt Arsenault; 6979b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory: 6980b0a25468SMatt Arsenault; GFX9: ; %bb.0: 6981b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6982b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 6983b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 6984b0a25468SMatt Arsenault; GFX9-NEXT: .LBB115_1: ; %atomicrmw.start 6985b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 6986b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6987b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 6988b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 6989b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 6990b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 6991b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6992b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 6993b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 6994b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 6995b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 6996b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 6997b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 6998b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB115_1 6999b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7000b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7001b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7002b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7003b0a25468SMatt Arsenault %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 7004b0a25468SMatt Arsenault ret void 7005b0a25468SMatt Arsenault} 7006b0a25468SMatt Arsenault 7007b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 7008b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory: 7009b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7010b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7011b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 7012b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7013b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 7014b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7015b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 7016b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 7017b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 7018b0a25468SMatt Arsenault; GFX7-NEXT: .LBB116_1: ; %atomicrmw.start 7019b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7020b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7021b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 7022b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 7023b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] 7024b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 7025b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 7026b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 7027b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7028b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7029b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7030b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7031b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 7032b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB116_1 7033b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7034b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 7035b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7036b0a25468SMatt Arsenault; 7037b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory: 7038b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7039b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7040b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 7041b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7042b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 7043b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7044b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 7045b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 7046b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 7047b0a25468SMatt Arsenault; GFX8-NEXT: .LBB116_1: ; %atomicrmw.start 7048b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7049b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7050b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 7051b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 7052b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] 7053b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 7054b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 7055b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 7056b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7057b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7058b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7059b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7060b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 7061b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB116_1 7062b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7063b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 7064b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7065b0a25468SMatt Arsenault; 7066b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory: 7067b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7068b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7069b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 7070b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 7071b0a25468SMatt Arsenault; GFX9-NEXT: .LBB116_1: ; %atomicrmw.start 7072b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7073b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7074b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 7075b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 7076b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] 7077b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7078b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7079b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 7080b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7081b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7082b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7083b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7084b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 7085b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB116_1 7086b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7087b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7088b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 7089b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 7090b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7091b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7092b0a25468SMatt Arsenault %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 7093b0a25468SMatt Arsenault ret i64 %result 7094b0a25468SMatt Arsenault} 7095b0a25468SMatt Arsenault 7096b0a25468SMatt Arsenault; --------------------------------------------------------------------- 7097b0a25468SMatt Arsenault; atomicrmw min 7098b0a25468SMatt Arsenault; --------------------------------------------------------------------- 7099b0a25468SMatt Arsenault 7100b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret(ptr %ptr, i64 %in) { 7101b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret: 7102b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7103b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7104b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 4, v0 7105b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7106b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[0:1] 7107b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[4:5] 7108b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 7109b0a25468SMatt Arsenault; GFX7-NEXT: .LBB117_1: ; %atomicrmw.start 7110b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7111b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7112b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7113b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7114b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7115b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7116b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7117b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7118b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7119b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 7120b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7121b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 7122b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 7123b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB117_1 7124b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7125b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 7126b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7127b0a25468SMatt Arsenault; 7128b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret: 7129b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7130b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7131b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 4, v0 7132b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7133b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[0:1] 7134b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[4:5] 7135b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 7136b0a25468SMatt Arsenault; GFX8-NEXT: .LBB117_1: ; %atomicrmw.start 7137b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7138b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7139b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7140b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7141b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7142b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7143b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7144b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7145b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7146b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 7147b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7148b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 7149b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 7150b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB117_1 7151b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7152b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 7153b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7154b0a25468SMatt Arsenault; 7155b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret: 7156b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7157b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7158b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] 7159b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 7160b0a25468SMatt Arsenault; GFX9-NEXT: .LBB117_1: ; %atomicrmw.start 7161b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7162b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7163b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7164b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7165b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7166b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7167b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7168b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7169b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7170b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 7171b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7172b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 7173b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 7174b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB117_1 7175b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7176b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7177b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7178b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 7179b0a25468SMatt Arsenault ret void 7180b0a25468SMatt Arsenault} 7181b0a25468SMatt Arsenault 7182b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret_offset(ptr %out, i64 %in) { 7183b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset: 7184b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7185b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7186b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 7187b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 7188b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 7189b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7190b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 7191b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 7192b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 7193b0a25468SMatt Arsenault; GFX7-NEXT: .LBB118_1: ; %atomicrmw.start 7194b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7195b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7196b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7197b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7198b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7199b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 7200b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7201b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7202b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 7203b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 7204b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7205b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 7206b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 7207b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB118_1 7208b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7209b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 7210b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7211b0a25468SMatt Arsenault; 7212b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset: 7213b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7214b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7215b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 7216b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 7217b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 7218b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7219b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 7220b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 7221b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 7222b0a25468SMatt Arsenault; GFX8-NEXT: .LBB118_1: ; %atomicrmw.start 7223b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7224b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7225b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7226b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7227b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7228b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 7229b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7230b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7231b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 7232b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 7233b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7234b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 7235b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 7236b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB118_1 7237b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7238b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 7239b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7240b0a25468SMatt Arsenault; 7241b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset: 7242b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7243b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7244b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 7245b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 7246b0a25468SMatt Arsenault; GFX9-NEXT: .LBB118_1: ; %atomicrmw.start 7247b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7248b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7249b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7250b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7251b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7252b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 7253b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7254b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7255b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7256b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 7257b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7258b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 7259b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 7260b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB118_1 7261b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7262b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7263b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7264b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7265b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 7266b0a25468SMatt Arsenault ret void 7267b0a25468SMatt Arsenault} 7268b0a25468SMatt Arsenault 7269b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret(ptr %ptr, i64 %in) { 7270b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret: 7271b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7272b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7273b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v5, vcc, 4, v0 7274b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 7275b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v4, v[0:1] 7276b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v5, v[5:6] 7277b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 7278b0a25468SMatt Arsenault; GFX7-NEXT: .LBB119_1: ; %atomicrmw.start 7279b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7280b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7281b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v5 7282b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v4 7283b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7284b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7285b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7286b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7287b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7288b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7289b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7290b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7291b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 7292b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB119_1 7293b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7294b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 7295b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, v4 7296b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, v5 7297b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7298b0a25468SMatt Arsenault; 7299b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret: 7300b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7301b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7302b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v0 7303b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc 7304b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v4, v[0:1] 7305b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v5, v[5:6] 7306b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 7307b0a25468SMatt Arsenault; GFX8-NEXT: .LBB119_1: ; %atomicrmw.start 7308b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7309b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7310b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v5 7311b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v4 7312b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7313b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7314b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7315b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7316b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7317b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7318b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7319b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7320b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 7321b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB119_1 7322b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7323b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 7324b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, v4 7325b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, v5 7326b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7327b0a25468SMatt Arsenault; 7328b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret: 7329b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7330b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7331b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 7332b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 7333b0a25468SMatt Arsenault; GFX9-NEXT: .LBB119_1: ; %atomicrmw.start 7334b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7335b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7336b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 7337b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 7338b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7339b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7340b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7341b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc 7342b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7343b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7344b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7345b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7346b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 7347b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB119_1 7348b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7349b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7350b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 7351b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 7352b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7353b0a25468SMatt Arsenault %result = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 7354b0a25468SMatt Arsenault ret i64 %result 7355b0a25468SMatt Arsenault} 7356b0a25468SMatt Arsenault 7357b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret_offset(ptr %out, i64 %in) { 7358b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset: 7359b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7360b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7361b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 7362b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7363b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 7364b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7365b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 7366b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 7367b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 7368b0a25468SMatt Arsenault; GFX7-NEXT: .LBB120_1: ; %atomicrmw.start 7369b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7370b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7371b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 7372b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 7373b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] 7374b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 7375b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 7376b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 7377b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7378b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7379b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7380b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7381b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 7382b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB120_1 7383b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7384b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 7385b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7386b0a25468SMatt Arsenault; 7387b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset: 7388b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7389b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7390b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 7391b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 7392b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 7393b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7394b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 7395b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 7396b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 7397b0a25468SMatt Arsenault; GFX8-NEXT: .LBB120_1: ; %atomicrmw.start 7398b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7399b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7400b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 7401b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 7402b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] 7403b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 7404b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 7405b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 7406b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7407b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7408b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7409b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7410b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 7411b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB120_1 7412b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7413b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 7414b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7415b0a25468SMatt Arsenault; 7416b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset: 7417b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7418b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7419b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 7420b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 7421b0a25468SMatt Arsenault; GFX9-NEXT: .LBB120_1: ; %atomicrmw.start 7422b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7423b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7424b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 7425b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 7426b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 7427b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 7428b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 7429b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 7430b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7431b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7432b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 7433b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 7434b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 7435b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB120_1 7436b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7437b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 7438b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 7439b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 7440b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7441b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7442b0a25468SMatt Arsenault %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 7443b0a25468SMatt Arsenault ret i64 %result 7444b0a25468SMatt Arsenault} 7445b0a25468SMatt Arsenault 7446b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 7447b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_scalar: 7448b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7449b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7450b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 7451b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 7452b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 7453b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 7454b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s34 7455b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s35 7456b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[0:1] 7457b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[3:4] 7458*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s4 7459b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 7460*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 7461*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 7462*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s5 7463b0a25468SMatt Arsenault; GFX7-NEXT: .LBB121_1: ; %atomicrmw.start 7464b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7465b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7466b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7467*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7468*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7469b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7470b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7471b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7472b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7473b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 7474b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7475b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 7476b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 7477b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB121_1 7478b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7479b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 7480b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7481b0a25468SMatt Arsenault; 7482b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_scalar: 7483b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7484b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7485b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 7486b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 7487b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 7488b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 7489b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s34 7490b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s35 7491b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[0:1] 7492b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[3:4] 7493*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s4 7494b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 7495*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 7496*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 7497*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s5 7498b0a25468SMatt Arsenault; GFX8-NEXT: .LBB121_1: ; %atomicrmw.start 7499b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7500b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7501b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7502*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7503*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7504b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7505b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7506b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7507b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7508b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 7509b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7510b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 7511b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 7512b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB121_1 7513b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7514b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 7515b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7516b0a25468SMatt Arsenault; 7517b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_scalar: 7518b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7519b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7520b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 7521b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 7522b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 7523*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 7524b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 7525*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 7526*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 7527*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 7528b0a25468SMatt Arsenault; GFX9-NEXT: .LBB121_1: ; %atomicrmw.start 7529b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7530b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7531b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7532*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7533*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7534b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7535b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7536b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7537b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7538b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 7539b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7540b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 7541b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 7542b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB121_1 7543b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7544b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 7545b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7546b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 7547b0a25468SMatt Arsenault ret void 7548b0a25468SMatt Arsenault} 7549b0a25468SMatt Arsenault 7550b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 7551b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset_scalar: 7552b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7553b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7554b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 7555b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 7556b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 7557b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 7558b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 7559b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 7560b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v4, s34 7561b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v5, s35 7562b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v3, v[0:1] 7563b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v2, v[4:5] 7564*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 7565*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s7 7566*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s6 7567b0a25468SMatt Arsenault; GFX7-NEXT: .LBB122_1: ; %atomicrmw.start 7568b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7569b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7570b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7571*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7572*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7573b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7574b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7575b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7576b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7577b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 7578*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7579b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 7580*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 7581b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB122_1 7582b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7583*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 7584b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7585b0a25468SMatt Arsenault; 7586b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset_scalar: 7587b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7588b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7589b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 7590b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 7591b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 7592b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 7593b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 7594b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 7595b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v4, s34 7596b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v5, s35 7597b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v3, v[0:1] 7598b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v2, v[4:5] 7599*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 7600*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s7 7601*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s6 7602b0a25468SMatt Arsenault; GFX8-NEXT: .LBB122_1: ; %atomicrmw.start 7603b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7604b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7605b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7606*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7607*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7608b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7609b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7610b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7611b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7612b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 7613*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7614b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 7615*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 7616b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB122_1 7617b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7618*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 7619b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7620b0a25468SMatt Arsenault; 7621b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset_scalar: 7622b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7623b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7624b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 7625b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 7626b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 7627*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s4 7628b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 7629*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s7 7630*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s6 7631*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s5 7632b0a25468SMatt Arsenault; GFX9-NEXT: .LBB122_1: ; %atomicrmw.start 7633b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7634b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7635b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3] 7636*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7637*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7638b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 7639b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7640b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7641b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7642b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 7643b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7644b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 7645b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 7646b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB122_1 7647b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7648b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 7649b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7650b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7651b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 7652b0a25468SMatt Arsenault ret void 7653b0a25468SMatt Arsenault} 7654b0a25468SMatt Arsenault 7655b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 7656b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_scalar: 7657b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7658b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7659b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s4 7660b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 4 7661b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s5 7662b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 7663b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 7664b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 7665b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[0:1] 7666b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[2:3] 7667*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v2, s4 7668b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[34:35], 0 7669*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 7670*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 7671*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v3, s5 7672b0a25468SMatt Arsenault; GFX7-NEXT: .LBB123_1: ; %atomicrmw.start 7673b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7674b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7675*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 7676*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 7677*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7678*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7679*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7680*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 7681b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7682b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7683*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7684b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7685b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 7686b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB123_1 7687b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7688b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 7689b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7690b0a25468SMatt Arsenault; 7691b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_scalar: 7692b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7693b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7694b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s4 7695b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 4 7696b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s5 7697b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 7698b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 7699b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 7700b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[0:1] 7701b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[2:3] 7702*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v2, s4 7703b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[34:35], 0 7704*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 7705*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 7706*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v3, s5 7707b0a25468SMatt Arsenault; GFX8-NEXT: .LBB123_1: ; %atomicrmw.start 7708b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7709b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7710*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 7711*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 7712*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7713*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7714*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7715*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 7716b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7717b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7718*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7719b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7720b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 7721b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB123_1 7722b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7723b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 7724b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7725b0a25468SMatt Arsenault; 7726b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_scalar: 7727b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7728b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7729b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 7730b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 7731b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 7732*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 7733b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 7734*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 7735*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 7736*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 7737b0a25468SMatt Arsenault; GFX9-NEXT: .LBB123_1: ; %atomicrmw.start 7738b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7739b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7740*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 7741*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 7742*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7743*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7744*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7745*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 7746b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7747b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7748*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7749b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7750b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 7751b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB123_1 7752b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7753b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 7754b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7755b0a25468SMatt Arsenault %result = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 7756b0a25468SMatt Arsenault ret i64 %result 7757b0a25468SMatt Arsenault} 7758b0a25468SMatt Arsenault 7759b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 7760b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset_scalar: 7761b0a25468SMatt Arsenault; GFX7: ; %bb.0: 7762b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7763b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 7764b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 7765b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s36, s4, 36 7766b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s37, s5, 0 7767b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s36 7768b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s37 7769b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 7770b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 7771b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 7772b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[2:3] 7773*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[34:35], 0 7774*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s7 7775*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s6 7776b0a25468SMatt Arsenault; GFX7-NEXT: .LBB124_1: ; %atomicrmw.start 7777b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7778b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7779*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v1 7780*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v0 7781*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7782*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7783*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7784*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 7785b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7786b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7787*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7788*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7789*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[34:35] 7790b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB124_1 7791b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7792*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[34:35] 7793b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 7794b0a25468SMatt Arsenault; 7795b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset_scalar: 7796b0a25468SMatt Arsenault; GFX8: ; %bb.0: 7797b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7798b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 7799b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 7800b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s36, s4, 36 7801b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s37, s5, 0 7802b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s36 7803b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s37 7804b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 7805b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 7806b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 7807b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[2:3] 7808*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[34:35], 0 7809*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s7 7810*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s6 7811b0a25468SMatt Arsenault; GFX8-NEXT: .LBB124_1: ; %atomicrmw.start 7812b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7813b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7814*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v1 7815*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v0 7816*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7817*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7818*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7819*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc 7820b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7821b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7822*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7823*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7824*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[34:35] 7825b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB124_1 7826b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7827*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[34:35] 7828b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 7829b0a25468SMatt Arsenault; 7830b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset_scalar: 7831b0a25468SMatt Arsenault; GFX9: ; %bb.0: 7832b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7833b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s4 7834b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s5 7835b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[0:1], v[0:1] offset:32 7836*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v2, s4 7837b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[34:35], 0 7838*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s7 7839*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s6 7840*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v3, s5 7841b0a25468SMatt Arsenault; GFX9-NEXT: .LBB124_1: ; %atomicrmw.start 7842b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7843b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7844*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v1 7845*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v0 7846*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9] 7847*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7848*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7849*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc 7850b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7851b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7852*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 7853b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[34:35], vcc, s[34:35] 7854b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[34:35] 7855b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB124_1 7856b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7857b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] 7858b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7859b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 7860b0a25468SMatt Arsenault %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 7861b0a25468SMatt Arsenault ret i64 %result 7862b0a25468SMatt Arsenault} 7863b0a25468SMatt Arsenault 7864b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %index) { 7865b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_addr64_offset: 7866b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 78676548b635SShilei Tian; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 78686548b635SShilei Tian; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7869b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 78706548b635SShilei Tian; GFX7-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 7871b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s4 7872b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s5 7873b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 7874b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 7875*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s1 7876*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s0 7877*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 7878*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 7879*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s3 7880*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s2 7881b0a25468SMatt Arsenault; GFX7-NEXT: .LBB125_1: ; %atomicrmw.start 7882b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7883b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7884b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 7885*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7886*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7887b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7888b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7889b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7890b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7891b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 7892*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 7893b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 7894*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 7895b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB125_1 7896b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 7897b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 7898b0a25468SMatt Arsenault; 7899b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_addr64_offset: 7900b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 79016548b635SShilei Tian; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 79026548b635SShilei Tian; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7903b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 79046548b635SShilei Tian; GFX8-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 7905b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s4 7906b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s5 7907b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 7908b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 7909*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s1 7910*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s0 7911*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[4:5] 7912*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 7913*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s3 7914*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s2 7915b0a25468SMatt Arsenault; GFX8-NEXT: .LBB125_1: ; %atomicrmw.start 7916b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 7917b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7918b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 7919*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7920*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7921b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 7922b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7923b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 7924b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7925b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 7926*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 7927b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 7928*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 7929b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB125_1 7930b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 7931b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 7932b0a25468SMatt Arsenault; 7933b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_addr64_offset: 7934b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 79356548b635SShilei Tian; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 79366548b635SShilei Tian; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7937b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 79386548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 79396548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s0, s4 79406548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s1, s5 7941*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s1 7942*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s0 7943*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[4:5] offset:32 7944*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 7945*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s3 7946*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s2 7947b0a25468SMatt Arsenault; GFX9-NEXT: .LBB125_1: ; %atomicrmw.start 7948b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 7949b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 79506548b635SShilei Tian; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 7951*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 7952*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 7953b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc 7954b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7955b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 7956b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 7957b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 7958*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 7959b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 7960*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 7961b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB125_1 7962b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 7963b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 7964b0a25468SMatt Arsenaultentry: 7965b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 7966b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 7967b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 7968b0a25468SMatt Arsenault ret void 7969b0a25468SMatt Arsenault} 7970b0a25468SMatt Arsenault 7971b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) { 7972b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_ret_addr64_offset: 7973b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 79746548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7975b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7976b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 7977b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 7978b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 7979b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, 32 7980b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, 0 7981b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 7982b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 7983*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 7984*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 7985*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 7986*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 7987b0a25468SMatt Arsenault; GFX7-NEXT: .LBB126_1: ; %atomicrmw.start 7988b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 7989b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7990*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 7991*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 7992*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9] 7993*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 7994*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 7995*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 7996b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7997b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 7998*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 7999*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8000*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 8001b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB126_1 8002b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 8003*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 8004*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 8005*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 8006*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8007b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 8008b0a25468SMatt Arsenault; 8009b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_ret_addr64_offset: 8010b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 80116548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 8012b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8013b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 8014b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 8015b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 8016b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, 32 8017b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, 0 8018b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 8019b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 8020*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8021*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 8022*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 8023*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 8024b0a25468SMatt Arsenault; GFX8-NEXT: .LBB126_1: ; %atomicrmw.start 8025b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 8026b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8027*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 8028*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 8029*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9] 8030*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 8031*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 8032*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 8033b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8034b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8035*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 8036*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8037*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 8038b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB126_1 8039b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 8040*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 8041*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 8042*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 8043*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8044b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 8045b0a25468SMatt Arsenault; 8046b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_ret_addr64_offset: 8047b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 80486548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 8049b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 80506548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 80516548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 80526548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 8053b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 8054b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 8055*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:32 8056*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 8057*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 8058*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 8059b0a25468SMatt Arsenault; GFX9-NEXT: .LBB126_1: ; %atomicrmw.start 8060b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 8061b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8062*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 8063*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 8064*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9] 8065*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 8066*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 8067*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc 8068b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8069b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8070*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 8071*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8072*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 8073b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB126_1 8074b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 8075*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 8076*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 8077*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 8078*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8079b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 8080b0a25468SMatt Arsenaultentry: 8081b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 8082b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %ptr, i64 4 8083b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8084b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 8085b0a25468SMatt Arsenault ret void 8086b0a25468SMatt Arsenault} 8087b0a25468SMatt Arsenault 8088b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) { 8089b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64: 8090b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 80916548b635SShilei Tian; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8092b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 8093b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8094b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 8095b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 8096b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8097*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s1 8098*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v6, s3 8099*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v7, s2 8100*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s0 8101b0a25468SMatt Arsenault; GFX7-NEXT: .LBB127_1: ; %atomicrmw.start 8102b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 8103b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8104b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 8105*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 8106*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 8107b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 8108b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8109b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8110b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 8111b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, v1 8112b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8113b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, v0 8114b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 8115b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB127_1 8116b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 8117b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 8118b0a25468SMatt Arsenault; 8119b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64: 8120b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 81216548b635SShilei Tian; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8122b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 8123b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8124b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 8125b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 8126b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8127*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s1 8128*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v6, s3 8129*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v7, s2 8130*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s0 8131b0a25468SMatt Arsenault; GFX8-NEXT: .LBB127_1: ; %atomicrmw.start 8132b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 8133b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8134b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 8135*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 8136*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 8137b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 8138b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8139b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8140b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 8141b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, v1 8142b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8143b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, v0 8144b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 8145b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB127_1 8146b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 8147b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 8148b0a25468SMatt Arsenault; 8149b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64: 8150b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 81516548b635SShilei Tian; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 81526548b635SShilei Tian; GFX9-NEXT: s_mov_b64 s[4:5], 0 8153b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 81546548b635SShilei Tian; GFX9-NEXT: v_mov_b32_e32 v0, s0 81556548b635SShilei Tian; GFX9-NEXT: v_mov_b32_e32 v1, s1 8156b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8157*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s1 8158*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v6, s3 8159*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v7, s2 8160*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s0 8161b0a25468SMatt Arsenault; GFX9-NEXT: .LBB127_1: ; %atomicrmw.start 8162b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 8163b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 81646548b635SShilei Tian; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] 8165*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 8166*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 8167b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 8168b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8169b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8170b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 8171b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, v1 81726548b635SShilei Tian; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8173b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, v0 81746548b635SShilei Tian; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 8175b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB127_1 8176b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 8177b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 8178b0a25468SMatt Arsenaultentry: 8179b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %out, i64 %in seq_cst, !noalias.addrspace !1 8180b0a25468SMatt Arsenault ret void 8181b0a25468SMatt Arsenault} 8182b0a25468SMatt Arsenault 8183b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) { 8184b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_ret_addr64: 8185b0a25468SMatt Arsenault; GFX7: ; %bb.0: ; %entry 81866548b635SShilei Tian; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 8187b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8188b0a25468SMatt Arsenault; GFX7-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 8189b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s0, s0, s6 8190b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s1, s1, s7 8191b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s0 8192b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s1 8193*eeac0ffaSNikita Popov; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8194*eeac0ffaSNikita Popov; GFX7-NEXT: s_mov_b64 s[0:1], 0 8195*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v4, s5 8196*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v5, s4 8197b0a25468SMatt Arsenault; GFX7-NEXT: .LBB128_1: ; %atomicrmw.start 8198b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 8199b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8200*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v9, v3 8201*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v8, v2 8202*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9] 8203*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 8204*eeac0ffaSNikita Popov; GFX7-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 8205*eeac0ffaSNikita Popov; GFX7-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 8206b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8207b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8208*eeac0ffaSNikita Popov; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 8209*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8210*eeac0ffaSNikita Popov; GFX7-NEXT: s_andn2_b64 exec, exec, s[0:1] 8211b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB128_1 8212b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 8213*eeac0ffaSNikita Popov; GFX7-NEXT: s_or_b64 exec, exec, s[0:1] 8214*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v0, s2 8215*eeac0ffaSNikita Popov; GFX7-NEXT: v_mov_b32_e32 v1, s3 8216*eeac0ffaSNikita Popov; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8217b0a25468SMatt Arsenault; GFX7-NEXT: s_endpgm 8218b0a25468SMatt Arsenault; 8219b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_ret_addr64: 8220b0a25468SMatt Arsenault; GFX8: ; %bb.0: ; %entry 82216548b635SShilei Tian; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 8222b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8223b0a25468SMatt Arsenault; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 3 8224b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s0, s0, s6 8225b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s1, s1, s7 8226b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s0 8227b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s1 8228*eeac0ffaSNikita Popov; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8229*eeac0ffaSNikita Popov; GFX8-NEXT: s_mov_b64 s[0:1], 0 8230*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v4, s5 8231*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v5, s4 8232b0a25468SMatt Arsenault; GFX8-NEXT: .LBB128_1: ; %atomicrmw.start 8233b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 8234b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8235*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v9, v3 8236*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v8, v2 8237*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9] 8238*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 8239*eeac0ffaSNikita Popov; GFX8-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 8240*eeac0ffaSNikita Popov; GFX8-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 8241b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8242b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8243*eeac0ffaSNikita Popov; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 8244*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8245*eeac0ffaSNikita Popov; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] 8246b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB128_1 8247b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 8248*eeac0ffaSNikita Popov; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] 8249*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v0, s2 8250*eeac0ffaSNikita Popov; GFX8-NEXT: v_mov_b32_e32 v1, s3 8251*eeac0ffaSNikita Popov; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8252b0a25468SMatt Arsenault; GFX8-NEXT: s_endpgm 8253b0a25468SMatt Arsenault; 8254b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_ret_addr64: 8255b0a25468SMatt Arsenault; GFX9: ; %bb.0: ; %entry 82566548b635SShilei Tian; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 8257b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt lgkmcnt(0) 82586548b635SShilei Tian; GFX9-NEXT: s_lshl_b64 s[0:1], s[14:15], 3 82596548b635SShilei Tian; GFX9-NEXT: s_add_u32 s0, s8, s0 82606548b635SShilei Tian; GFX9-NEXT: s_addc_u32 s1, s9, s1 8261b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s0 8262b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s1 8263*eeac0ffaSNikita Popov; GFX9-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 8264*eeac0ffaSNikita Popov; GFX9-NEXT: s_mov_b64 s[0:1], 0 8265*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v4, s13 8266*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v5, s12 8267b0a25468SMatt Arsenault; GFX9-NEXT: .LBB128_1: ; %atomicrmw.start 8268b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 8269b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8270*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v9, v3 8271*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v8, v2 8272*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9] 8273*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v7, v4, v9, vcc 8274*eeac0ffaSNikita Popov; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v8, vcc 8275*eeac0ffaSNikita Popov; GFX9-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc 8276b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8277b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8278*eeac0ffaSNikita Popov; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] 8279*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 8280*eeac0ffaSNikita Popov; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] 8281b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB128_1 8282b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 8283*eeac0ffaSNikita Popov; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] 8284*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v0, s10 8285*eeac0ffaSNikita Popov; GFX9-NEXT: v_mov_b32_e32 v1, s11 8286*eeac0ffaSNikita Popov; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 8287b0a25468SMatt Arsenault; GFX9-NEXT: s_endpgm 8288b0a25468SMatt Arsenaultentry: 8289b0a25468SMatt Arsenault %ptr = getelementptr i64, ptr %out, i64 %index 8290b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1, !noalias.addrspace !1 8291b0a25468SMatt Arsenault store i64 %tmp0, ptr %out2 8292b0a25468SMatt Arsenault ret void 8293b0a25468SMatt Arsenault} 8294b0a25468SMatt Arsenault 8295b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 8296b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory: 8297b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8298b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8299b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v8, vcc, 32, v0 8300b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 8301b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 8302b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8303b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v7, v[0:1] 8304b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v6, v[8:9] 8305b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 8306b0a25468SMatt Arsenault; GFX7-NEXT: .LBB129_1: ; %atomicrmw.start 8307b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 8308b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8309b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 8310b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 8311b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 8312b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 8313b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8314b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8315b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 8316b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v7, v1 8317b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8318b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v6, v0 8319b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 8320b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB129_1 8321b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 8322b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 8323b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8324b0a25468SMatt Arsenault; 8325b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory: 8326b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8327b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8328b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v8, vcc, 32, v0 8329b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 8330b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 8331b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8332b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v7, v[0:1] 8333b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v6, v[8:9] 8334b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 8335b0a25468SMatt Arsenault; GFX8-NEXT: .LBB129_1: ; %atomicrmw.start 8336b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 8337b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8338b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 8339b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 8340b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 8341b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc 8342b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8343b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8344b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7] 8345b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v7, v1 8346b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8347b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v6, v0 8348b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 8349b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB129_1 8350b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 8351b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 8352b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8353b0a25468SMatt Arsenault; 8354b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory: 8355b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8356b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8357b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[6:7], v[0:1] offset:32 8358b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 8359b0a25468SMatt Arsenault; GFX9-NEXT: .LBB129_1: ; %atomicrmw.start 8360b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 8361b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8362b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 8363b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 8364b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 8365b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 8366b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8367b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8368b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 8369b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 8370b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8371b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 8372b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 8373b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB129_1 8374b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 8375b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 8376b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8377b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8378b0a25468SMatt Arsenault %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 8379b0a25468SMatt Arsenault ret void 8380b0a25468SMatt Arsenault} 8381b0a25468SMatt Arsenault 8382b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 8383b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory: 8384b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8385b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8386b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v4, vcc, 32, v0 8387b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 8388b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 36, v0 8389b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8390b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v1, v[0:1] 8391b0a25468SMatt Arsenault; GFX7-NEXT: flat_load_dword v0, v[4:5] 8392b0a25468SMatt Arsenault; GFX7-NEXT: s_mov_b64 s[4:5], 0 8393b0a25468SMatt Arsenault; GFX7-NEXT: .LBB130_1: ; %atomicrmw.start 8394b0a25468SMatt Arsenault; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 8395b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8396b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v9, v1 8397b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v8, v0 8398b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] 8399b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 8400b0a25468SMatt Arsenault; GFX7-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 8401b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 8402b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8403b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8404b0a25468SMatt Arsenault; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 8405b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8406b0a25468SMatt Arsenault; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5] 8407b0a25468SMatt Arsenault; GFX7-NEXT: s_cbranch_execnz .LBB130_1 8408b0a25468SMatt Arsenault; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end 8409b0a25468SMatt Arsenault; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 8410b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8411b0a25468SMatt Arsenault; 8412b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory: 8413b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8414b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8415b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v4, vcc, 32, v0 8416b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 8417b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 36, v0 8418b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8419b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v1, v[0:1] 8420b0a25468SMatt Arsenault; GFX8-NEXT: flat_load_dword v0, v[4:5] 8421b0a25468SMatt Arsenault; GFX8-NEXT: s_mov_b64 s[4:5], 0 8422b0a25468SMatt Arsenault; GFX8-NEXT: .LBB130_1: ; %atomicrmw.start 8423b0a25468SMatt Arsenault; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 8424b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8425b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v9, v1 8426b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v8, v0 8427b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] 8428b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc 8429b0a25468SMatt Arsenault; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc 8430b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc 8431b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8432b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8433b0a25468SMatt Arsenault; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] 8434b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8435b0a25468SMatt Arsenault; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] 8436b0a25468SMatt Arsenault; GFX8-NEXT: s_cbranch_execnz .LBB130_1 8437b0a25468SMatt Arsenault; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end 8438b0a25468SMatt Arsenault; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 8439b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8440b0a25468SMatt Arsenault; 8441b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory: 8442b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8443b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8444b0a25468SMatt Arsenault; GFX9-NEXT: flat_load_dwordx2 v[4:5], v[0:1] offset:32 8445b0a25468SMatt Arsenault; GFX9-NEXT: s_mov_b64 s[4:5], 0 8446b0a25468SMatt Arsenault; GFX9-NEXT: .LBB130_1: ; %atomicrmw.start 8447b0a25468SMatt Arsenault; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 8448b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8449b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v7, v5 8450b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v6, v4 8451b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] 8452b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc 8453b0a25468SMatt Arsenault; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc 8454b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc 8455b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8456b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8457b0a25468SMatt Arsenault; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] 8458b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 8459b0a25468SMatt Arsenault; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 8460b0a25468SMatt Arsenault; GFX9-NEXT: s_cbranch_execnz .LBB130_1 8461b0a25468SMatt Arsenault; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end 8462b0a25468SMatt Arsenault; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 8463b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, v4 8464b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, v5 8465b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8466b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8467b0a25468SMatt Arsenault %result = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 8468b0a25468SMatt Arsenault ret i64 %result 8469b0a25468SMatt Arsenault} 8470b0a25468SMatt Arsenault 8471b0a25468SMatt Arsenault; --------------------------------------------------------------------- 8472b0a25468SMatt Arsenault; atomicrmw uinc_wrap 8473b0a25468SMatt Arsenault; --------------------------------------------------------------------- 8474b0a25468SMatt Arsenault 8475b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret(ptr %ptr, i64 %in) { 8476b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret: 8477b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8478b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8479b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8480b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8481b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8482b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8483b0a25468SMatt Arsenault; 8484b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret: 8485b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8486b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8487b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8488b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8489b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8490b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8491b0a25468SMatt Arsenault; 8492b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret: 8493b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8494b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8495b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8496b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8497b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8498b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8499b0a25468SMatt Arsenault %tmp0 = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8500b0a25468SMatt Arsenault ret void 8501b0a25468SMatt Arsenault} 8502b0a25468SMatt Arsenault 8503b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret_offset(ptr %out, i64 %in) { 8504b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset: 8505b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8506b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8507b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8508b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8509b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8510b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8511b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8512b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8513b0a25468SMatt Arsenault; 8514b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset: 8515b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8516b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8517b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8518b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8519b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8520b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8521b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8522b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8523b0a25468SMatt Arsenault; 8524b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset: 8525b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8526b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8527b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] offset:32 8528b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8529b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8530b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8531b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8532b0a25468SMatt Arsenault %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8533b0a25468SMatt Arsenault ret void 8534b0a25468SMatt Arsenault} 8535b0a25468SMatt Arsenault 8536b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret(ptr %ptr, i64 %in) { 8537b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret: 8538b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8539b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8540b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8541b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8542b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8543b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8544b0a25468SMatt Arsenault; 8545b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret: 8546b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8547b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8548b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8549b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8550b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8551b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8552b0a25468SMatt Arsenault; 8553b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret: 8554b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8555b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8556b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8557b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8558b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8559b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8560b0a25468SMatt Arsenault %result = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8561b0a25468SMatt Arsenault ret i64 %result 8562b0a25468SMatt Arsenault} 8563b0a25468SMatt Arsenault 8564b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret_offset(ptr %out, i64 %in) { 8565b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset: 8566b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8567b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8568b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8569b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8570b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8571b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8572b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8573b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8574b0a25468SMatt Arsenault; 8575b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset: 8576b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8577b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8578b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8579b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8580b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8581b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8582b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8583b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8584b0a25468SMatt Arsenault; 8585b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset: 8586b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8587b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8588b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 8589b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8590b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8591b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8592b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8593b0a25468SMatt Arsenault %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8594b0a25468SMatt Arsenault ret i64 %result 8595b0a25468SMatt Arsenault} 8596b0a25468SMatt Arsenault 8597b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 8598b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar: 8599b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8600b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8601b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 8602b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 8603b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 8604b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 8605b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] 8606b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8607b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8608b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8609b0a25468SMatt Arsenault; 8610b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar: 8611b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8612b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8613b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 8614b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 8615b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 8616b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 8617b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] 8618b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8619b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8620b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8621b0a25468SMatt Arsenault; 8622b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar: 8623b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8624b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8625b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 8626b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 8627b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 8628b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 8629b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] 8630b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8631b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8632b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8633b0a25468SMatt Arsenault %tmp0 = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8634b0a25468SMatt Arsenault ret void 8635b0a25468SMatt Arsenault} 8636b0a25468SMatt Arsenault 8637b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 8638b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar: 8639b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8640b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8641b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 8642b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 8643b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 8644b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 8645b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 8646b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 8647b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] 8648b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8649b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8650b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8651b0a25468SMatt Arsenault; 8652b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar: 8653b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8654b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8655b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 8656b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 8657b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 8658b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 8659b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 8660b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 8661b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] 8662b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8663b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8664b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8665b0a25468SMatt Arsenault; 8666b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar: 8667b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8668b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8669b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 8670b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 8671b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 8672b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 8673b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32 8674b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8675b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8676b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8677b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8678b0a25468SMatt Arsenault %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8679b0a25468SMatt Arsenault ret void 8680b0a25468SMatt Arsenault} 8681b0a25468SMatt Arsenault 8682b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 8683b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar: 8684b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8685b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8686b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 8687b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 8688b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 8689b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 8690b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc 8691b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8692b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8693b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8694b0a25468SMatt Arsenault; 8695b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar: 8696b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8697b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8698b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 8699b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 8700b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 8701b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 8702b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc 8703b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8704b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8705b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8706b0a25468SMatt Arsenault; 8707b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar: 8708b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8709b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8710b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 8711b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 8712b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 8713b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 8714b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc 8715b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8716b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8717b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8718b0a25468SMatt Arsenault %result = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8719b0a25468SMatt Arsenault ret i64 %result 8720b0a25468SMatt Arsenault} 8721b0a25468SMatt Arsenault 8722b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 8723b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar: 8724b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8725b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8726b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 8727b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 8728b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 8729b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 8730b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 8731b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 8732b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc 8733b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8734b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8735b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8736b0a25468SMatt Arsenault; 8737b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar: 8738b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8739b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8740b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 8741b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 8742b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 8743b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 8744b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 8745b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 8746b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc 8747b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8748b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8749b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8750b0a25468SMatt Arsenault; 8751b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar: 8752b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8753b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8754b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 8755b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 8756b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 8757b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 8758b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 8759b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8760b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8761b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8762b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8763b0a25468SMatt Arsenault %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8764b0a25468SMatt Arsenault ret i64 %result 8765b0a25468SMatt Arsenault} 8766b0a25468SMatt Arsenault 8767b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 8768b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory: 8769b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8770b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8771b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8772b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8773b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8774b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8775b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8776b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8777b0a25468SMatt Arsenault; 8778b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory: 8779b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8780b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8781b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8782b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8783b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] 8784b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8785b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8786b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8787b0a25468SMatt Arsenault; 8788b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory: 8789b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8790b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8791b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] offset:32 8792b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8793b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8794b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8795b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8796b0a25468SMatt Arsenault %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 8797b0a25468SMatt Arsenault ret void 8798b0a25468SMatt Arsenault} 8799b0a25468SMatt Arsenault 8800b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 8801b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory: 8802b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8803b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8804b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8805b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8806b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8807b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8808b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8809b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8810b0a25468SMatt Arsenault; 8811b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory: 8812b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8813b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8814b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8815b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8816b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc 8817b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8818b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8819b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8820b0a25468SMatt Arsenault; 8821b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory: 8822b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8823b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8824b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 8825b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8826b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8827b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8828b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8829b0a25468SMatt Arsenault %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 8830b0a25468SMatt Arsenault ret i64 %result 8831b0a25468SMatt Arsenault} 8832b0a25468SMatt Arsenault 8833b0a25468SMatt Arsenault; --------------------------------------------------------------------- 8834b0a25468SMatt Arsenault; atomicrmw udec_wrap 8835b0a25468SMatt Arsenault; --------------------------------------------------------------------- 8836b0a25468SMatt Arsenault 8837b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret(ptr %ptr, i64 %in) { 8838b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret: 8839b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8840b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8841b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 8842b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8843b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8844b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8845b0a25468SMatt Arsenault; 8846b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret: 8847b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8848b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8849b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 8850b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8851b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8852b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8853b0a25468SMatt Arsenault; 8854b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret: 8855b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8856b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8857b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 8858b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8859b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8860b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8861b0a25468SMatt Arsenault %tmp0 = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8862b0a25468SMatt Arsenault ret void 8863b0a25468SMatt Arsenault} 8864b0a25468SMatt Arsenault 8865b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret_offset(ptr %out, i64 %in) { 8866b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset: 8867b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8868b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8869b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8870b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8871b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 8872b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8873b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8874b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8875b0a25468SMatt Arsenault; 8876b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset: 8877b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8878b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8879b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8880b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8881b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 8882b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8883b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8884b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8885b0a25468SMatt Arsenault; 8886b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset: 8887b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8888b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8889b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] offset:32 8890b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8891b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8892b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8893b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8894b0a25468SMatt Arsenault %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8895b0a25468SMatt Arsenault ret void 8896b0a25468SMatt Arsenault} 8897b0a25468SMatt Arsenault 8898b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret(ptr %ptr, i64 %in) { 8899b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret: 8900b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8901b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8902b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 8903b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8904b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8905b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8906b0a25468SMatt Arsenault; 8907b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret: 8908b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8909b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8910b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 8911b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8912b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8913b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8914b0a25468SMatt Arsenault; 8915b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret: 8916b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8917b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8918b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 8919b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8920b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8921b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8922b0a25468SMatt Arsenault %result = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8923b0a25468SMatt Arsenault ret i64 %result 8924b0a25468SMatt Arsenault} 8925b0a25468SMatt Arsenault 8926b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret_offset(ptr %out, i64 %in) { 8927b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset: 8928b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8929b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8930b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 8931b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8932b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 8933b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8934b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8935b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8936b0a25468SMatt Arsenault; 8937b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset: 8938b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8939b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8940b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 8941b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8942b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 8943b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8944b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8945b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8946b0a25468SMatt Arsenault; 8947b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset: 8948b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8949b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8950b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 8951b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8952b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8953b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8954b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 8955b0a25468SMatt Arsenault %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 8956b0a25468SMatt Arsenault ret i64 %result 8957b0a25468SMatt Arsenault} 8958b0a25468SMatt Arsenault 8959b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) { 8960b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_scalar: 8961b0a25468SMatt Arsenault; GFX7: ; %bb.0: 8962b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8963b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 8964b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 8965b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 8966b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 8967b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] 8968b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8969b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 8970b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 8971b0a25468SMatt Arsenault; 8972b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_scalar: 8973b0a25468SMatt Arsenault; GFX8: ; %bb.0: 8974b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8975b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 8976b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 8977b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 8978b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 8979b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] 8980b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8981b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 8982b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 8983b0a25468SMatt Arsenault; 8984b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_scalar: 8985b0a25468SMatt Arsenault; GFX9: ; %bb.0: 8986b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8987b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 8988b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 8989b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 8990b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 8991b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] 8992b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8993b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 8994b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 8995b0a25468SMatt Arsenault %tmp0 = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 8996b0a25468SMatt Arsenault ret void 8997b0a25468SMatt Arsenault} 8998b0a25468SMatt Arsenault 8999b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) { 9000b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar: 9001b0a25468SMatt Arsenault; GFX7: ; %bb.0: 9002b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9003b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 9004b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 9005b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 9006b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 9007b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 9008b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 9009b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] 9010b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9011b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 9012b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9013b0a25468SMatt Arsenault; 9014b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar: 9015b0a25468SMatt Arsenault; GFX8: ; %bb.0: 9016b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9017b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 9018b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 9019b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 9020b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 9021b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 9022b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 9023b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] 9024b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9025b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 9026b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 9027b0a25468SMatt Arsenault; 9028b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar: 9029b0a25468SMatt Arsenault; GFX9: ; %bb.0: 9030b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9031b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 9032b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 9033b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 9034b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 9035b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] offset:32 9036b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9037b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 9038b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 9039b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 9040b0a25468SMatt Arsenault %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 9041b0a25468SMatt Arsenault ret void 9042b0a25468SMatt Arsenault} 9043b0a25468SMatt Arsenault 9044b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) { 9045b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_scalar: 9046b0a25468SMatt Arsenault; GFX7: ; %bb.0: 9047b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9048b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 9049b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 9050b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s4 9051b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s5 9052b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc 9053b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9054b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 9055b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9056b0a25468SMatt Arsenault; 9057b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_scalar: 9058b0a25468SMatt Arsenault; GFX8: ; %bb.0: 9059b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9060b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 9061b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 9062b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s4 9063b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s5 9064b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc 9065b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9066b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 9067b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 9068b0a25468SMatt Arsenault; 9069b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_scalar: 9070b0a25468SMatt Arsenault; GFX9: ; %bb.0: 9071b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9072b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 9073b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 9074b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 9075b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 9076b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc 9077b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9078b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 9079b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 9080b0a25468SMatt Arsenault %result = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 9081b0a25468SMatt Arsenault ret i64 %result 9082b0a25468SMatt Arsenault} 9083b0a25468SMatt Arsenault 9084b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) { 9085b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar: 9086b0a25468SMatt Arsenault; GFX7: ; %bb.0: 9087b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9088b0a25468SMatt Arsenault; GFX7-NEXT: s_add_u32 s34, s4, 32 9089b0a25468SMatt Arsenault; GFX7-NEXT: s_addc_u32 s35, s5, 0 9090b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v2, s34 9091b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v0, s6 9092b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v1, s7 9093b0a25468SMatt Arsenault; GFX7-NEXT: v_mov_b32_e32 v3, s35 9094b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc 9095b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9096b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 9097b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9098b0a25468SMatt Arsenault; 9099b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar: 9100b0a25468SMatt Arsenault; GFX8: ; %bb.0: 9101b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9102b0a25468SMatt Arsenault; GFX8-NEXT: s_add_u32 s34, s4, 32 9103b0a25468SMatt Arsenault; GFX8-NEXT: s_addc_u32 s35, s5, 0 9104b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v2, s34 9105b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v0, s6 9106b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v1, s7 9107b0a25468SMatt Arsenault; GFX8-NEXT: v_mov_b32_e32 v3, s35 9108b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc 9109b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9110b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 9111b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 9112b0a25468SMatt Arsenault; 9113b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar: 9114b0a25468SMatt Arsenault; GFX9: ; %bb.0: 9115b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9116b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v0, s6 9117b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, s7 9118b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v2, s4 9119b0a25468SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v3, s5 9120b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] offset:32 glc 9121b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9122b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 9123b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 9124b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 9125b0a25468SMatt Arsenault %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 9126b0a25468SMatt Arsenault ret i64 %result 9127b0a25468SMatt Arsenault} 9128b0a25468SMatt Arsenault 9129b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 9130b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory: 9131b0a25468SMatt Arsenault; GFX7: ; %bb.0: 9132b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9133b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 9134b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 9135b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 9136b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9137b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 9138b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9139b0a25468SMatt Arsenault; 9140b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory: 9141b0a25468SMatt Arsenault; GFX8: ; %bb.0: 9142b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9143b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 9144b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 9145b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] 9146b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9147b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 9148b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 9149b0a25468SMatt Arsenault; 9150b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory: 9151b0a25468SMatt Arsenault; GFX9: ; %bb.0: 9152b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9153b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] offset:32 9154b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9155b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 9156b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 9157b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 9158b0a25468SMatt Arsenault %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 9159b0a25468SMatt Arsenault ret void 9160b0a25468SMatt Arsenault} 9161b0a25468SMatt Arsenault 9162b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) { 9163b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory: 9164b0a25468SMatt Arsenault; GFX7: ; %bb.0: 9165b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9166b0a25468SMatt Arsenault; GFX7-NEXT: v_add_i32_e32 v0, vcc, 32, v0 9167b0a25468SMatt Arsenault; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 9168b0a25468SMatt Arsenault; GFX7-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 9169b0a25468SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9170b0a25468SMatt Arsenault; GFX7-NEXT: buffer_wbinvl1_vol 9171b0a25468SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9172b0a25468SMatt Arsenault; 9173b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory: 9174b0a25468SMatt Arsenault; GFX8: ; %bb.0: 9175b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9176b0a25468SMatt Arsenault; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 9177b0a25468SMatt Arsenault; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 9178b0a25468SMatt Arsenault; GFX8-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc 9179b0a25468SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9180b0a25468SMatt Arsenault; GFX8-NEXT: buffer_wbinvl1_vol 9181b0a25468SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 9182b0a25468SMatt Arsenault; 9183b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory: 9184b0a25468SMatt Arsenault; GFX9: ; %bb.0: 9185b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9186b0a25468SMatt Arsenault; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] offset:32 glc 9187b0a25468SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9188b0a25468SMatt Arsenault; GFX9-NEXT: buffer_wbinvl1_vol 9189b0a25468SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 9190b0a25468SMatt Arsenault %gep = getelementptr i64, ptr %out, i64 4 9191b0a25468SMatt Arsenault %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 9192b0a25468SMatt Arsenault ret i64 %result 9193b0a25468SMatt Arsenault} 9194b0a25468SMatt Arsenault 9195b0a25468SMatt Arsenault!0 = !{} 9196b0a25468SMatt Arsenault!1 = !{i32 5, i32 6} 9197