1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s 3# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s 4# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s 5# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s 6# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX12 %s 7 8--- 9 10name: load_atomic_flat_s32_seq_cst 11legalized: true 12regBankSelected: true 13tracksRegLiveness: true 14 15body: | 16 bb.0: 17 liveins: $vgpr0_vgpr1 18 19 ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst 20 ; GFX7: liveins: $vgpr0_vgpr1 21 ; GFX7-NEXT: {{ $}} 22 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 23 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 24 ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 25 ; 26 ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst 27 ; GFX9: liveins: $vgpr0_vgpr1 28 ; GFX9-NEXT: {{ $}} 29 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 30 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 31 ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 32 ; 33 ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst 34 ; GFX10: liveins: $vgpr0_vgpr1 35 ; GFX10-NEXT: {{ $}} 36 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 37 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 38 ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 39 ; 40 ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst 41 ; GFX11: liveins: $vgpr0_vgpr1 42 ; GFX11-NEXT: {{ $}} 43 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 44 ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 45 ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 46 ; 47 ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst 48 ; GFX12: liveins: $vgpr0_vgpr1 49 ; GFX12-NEXT: {{ $}} 50 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 51 ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 52 ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 53 %0:vgpr(p0) = COPY $vgpr0_vgpr1 54 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) 55 $vgpr0 = COPY %1 56 57... 58 59--- 60 61name: load_atomic_flat_v2s16_seq_cst 62legalized: true 63regBankSelected: true 64tracksRegLiveness: true 65 66body: | 67 bb.0: 68 liveins: $vgpr0_vgpr1 69 70 ; GFX7-LABEL: name: load_atomic_flat_v2s16_seq_cst 71 ; GFX7: liveins: $vgpr0_vgpr1 72 ; GFX7-NEXT: {{ $}} 73 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 74 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) 75 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 76 ; 77 ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst 78 ; GFX9: liveins: $vgpr0_vgpr1 79 ; GFX9-NEXT: {{ $}} 80 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 81 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) 82 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 83 ; 84 ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst 85 ; GFX10: liveins: $vgpr0_vgpr1 86 ; GFX10-NEXT: {{ $}} 87 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 88 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) 89 ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 90 ; 91 ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst 92 ; GFX11: liveins: $vgpr0_vgpr1 93 ; GFX11-NEXT: {{ $}} 94 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 95 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) 96 ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 97 ; 98 ; GFX12-LABEL: name: load_atomic_flat_v2s16_seq_cst 99 ; GFX12: liveins: $vgpr0_vgpr1 100 ; GFX12-NEXT: {{ $}} 101 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 102 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) 103 ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 104 %0:vgpr(p0) = COPY $vgpr0_vgpr1 105 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) 106 $vgpr0 = COPY %1 107 108... 109 110--- 111 112name: load_atomic_flat_p3_seq_cst 113legalized: true 114regBankSelected: true 115tracksRegLiveness: true 116 117body: | 118 bb.0: 119 liveins: $vgpr0_vgpr1 120 121 ; GFX7-LABEL: name: load_atomic_flat_p3_seq_cst 122 ; GFX7: liveins: $vgpr0_vgpr1 123 ; GFX7-NEXT: {{ $}} 124 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 125 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) 126 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) 127 ; 128 ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst 129 ; GFX9: liveins: $vgpr0_vgpr1 130 ; GFX9-NEXT: {{ $}} 131 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 132 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) 133 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) 134 ; 135 ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst 136 ; GFX10: liveins: $vgpr0_vgpr1 137 ; GFX10-NEXT: {{ $}} 138 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 139 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) 140 ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) 141 ; 142 ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst 143 ; GFX11: liveins: $vgpr0_vgpr1 144 ; GFX11-NEXT: {{ $}} 145 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 146 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) 147 ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) 148 ; 149 ; GFX12-LABEL: name: load_atomic_flat_p3_seq_cst 150 ; GFX12: liveins: $vgpr0_vgpr1 151 ; GFX12-NEXT: {{ $}} 152 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 153 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) 154 ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) 155 %0:vgpr(p0) = COPY $vgpr0_vgpr1 156 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) 157 $vgpr0 = COPY %1 158 159... 160 161--- 162 163name: load_atomic_flat_s64_seq_cst 164legalized: true 165regBankSelected: true 166tracksRegLiveness: true 167 168body: | 169 bb.0: 170 liveins: $vgpr0_vgpr1 171 172 ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst 173 ; GFX7: liveins: $vgpr0_vgpr1 174 ; GFX7-NEXT: {{ $}} 175 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 176 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) 177 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 178 ; 179 ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst 180 ; GFX9: liveins: $vgpr0_vgpr1 181 ; GFX9-NEXT: {{ $}} 182 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 183 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) 184 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 185 ; 186 ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst 187 ; GFX10: liveins: $vgpr0_vgpr1 188 ; GFX10-NEXT: {{ $}} 189 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 190 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) 191 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 192 ; 193 ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst 194 ; GFX11: liveins: $vgpr0_vgpr1 195 ; GFX11-NEXT: {{ $}} 196 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 197 ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) 198 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 199 ; 200 ; GFX12-LABEL: name: load_atomic_flat_s64_seq_cst 201 ; GFX12: liveins: $vgpr0_vgpr1 202 ; GFX12-NEXT: {{ $}} 203 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 204 ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) 205 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 206 %0:vgpr(p0) = COPY $vgpr0_vgpr1 207 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) 208 $vgpr0_vgpr1 = COPY %1 209 210... 211 212--- 213 214name: load_atomic_flat_v2s32_seq_cst 215legalized: true 216regBankSelected: true 217tracksRegLiveness: true 218 219body: | 220 bb.0: 221 liveins: $vgpr0_vgpr1 222 223 ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst 224 ; GFX7: liveins: $vgpr0_vgpr1 225 ; GFX7-NEXT: {{ $}} 226 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 227 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) 228 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 229 ; 230 ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst 231 ; GFX9: liveins: $vgpr0_vgpr1 232 ; GFX9-NEXT: {{ $}} 233 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 234 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) 235 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 236 ; 237 ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst 238 ; GFX10: liveins: $vgpr0_vgpr1 239 ; GFX10-NEXT: {{ $}} 240 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 241 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) 242 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 243 ; 244 ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst 245 ; GFX11: liveins: $vgpr0_vgpr1 246 ; GFX11-NEXT: {{ $}} 247 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 248 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) 249 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 250 ; 251 ; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst 252 ; GFX12: liveins: $vgpr0_vgpr1 253 ; GFX12-NEXT: {{ $}} 254 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 255 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) 256 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 257 %0:vgpr(p0) = COPY $vgpr0_vgpr1 258 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) 259 $vgpr0_vgpr1 = COPY %1 260 261... 262 263--- 264 265name: load_atomic_flat_v4s16_seq_cst 266legalized: true 267regBankSelected: true 268tracksRegLiveness: true 269 270body: | 271 bb.0: 272 liveins: $vgpr0_vgpr1 273 274 ; GFX7-LABEL: name: load_atomic_flat_v4s16_seq_cst 275 ; GFX7: liveins: $vgpr0_vgpr1 276 ; GFX7-NEXT: {{ $}} 277 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 278 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) 279 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 280 ; 281 ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst 282 ; GFX9: liveins: $vgpr0_vgpr1 283 ; GFX9-NEXT: {{ $}} 284 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 285 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) 286 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 287 ; 288 ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst 289 ; GFX10: liveins: $vgpr0_vgpr1 290 ; GFX10-NEXT: {{ $}} 291 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 292 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) 293 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 294 ; 295 ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst 296 ; GFX11: liveins: $vgpr0_vgpr1 297 ; GFX11-NEXT: {{ $}} 298 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 299 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) 300 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 301 ; 302 ; GFX12-LABEL: name: load_atomic_flat_v4s16_seq_cst 303 ; GFX12: liveins: $vgpr0_vgpr1 304 ; GFX12-NEXT: {{ $}} 305 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 306 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) 307 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 308 %0:vgpr(p0) = COPY $vgpr0_vgpr1 309 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) 310 $vgpr0_vgpr1 = COPY %1 311 312... 313 314--- 315 316name: load_atomic_flat_p1_seq_cst 317legalized: true 318regBankSelected: true 319tracksRegLiveness: true 320 321body: | 322 bb.0: 323 liveins: $vgpr0_vgpr1 324 325 ; GFX7-LABEL: name: load_atomic_flat_p1_seq_cst 326 ; GFX7: liveins: $vgpr0_vgpr1 327 ; GFX7-NEXT: {{ $}} 328 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 329 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) 330 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 331 ; 332 ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst 333 ; GFX9: liveins: $vgpr0_vgpr1 334 ; GFX9-NEXT: {{ $}} 335 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 336 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) 337 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 338 ; 339 ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst 340 ; GFX10: liveins: $vgpr0_vgpr1 341 ; GFX10-NEXT: {{ $}} 342 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 343 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) 344 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 345 ; 346 ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst 347 ; GFX11: liveins: $vgpr0_vgpr1 348 ; GFX11-NEXT: {{ $}} 349 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 350 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) 351 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 352 ; 353 ; GFX12-LABEL: name: load_atomic_flat_p1_seq_cst 354 ; GFX12: liveins: $vgpr0_vgpr1 355 ; GFX12-NEXT: {{ $}} 356 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 357 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) 358 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 359 %0:vgpr(p0) = COPY $vgpr0_vgpr1 360 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) 361 $vgpr0_vgpr1 = COPY %1 362 363... 364 365--- 366 367name: load_atomic_flat_p0_seq_cst 368legalized: true 369regBankSelected: true 370tracksRegLiveness: true 371 372body: | 373 bb.0: 374 liveins: $vgpr0_vgpr1 375 376 ; GFX7-LABEL: name: load_atomic_flat_p0_seq_cst 377 ; GFX7: liveins: $vgpr0_vgpr1 378 ; GFX7-NEXT: {{ $}} 379 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 380 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) 381 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 382 ; 383 ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst 384 ; GFX9: liveins: $vgpr0_vgpr1 385 ; GFX9-NEXT: {{ $}} 386 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 387 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) 388 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 389 ; 390 ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst 391 ; GFX10: liveins: $vgpr0_vgpr1 392 ; GFX10-NEXT: {{ $}} 393 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 394 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) 395 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 396 ; 397 ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst 398 ; GFX11: liveins: $vgpr0_vgpr1 399 ; GFX11-NEXT: {{ $}} 400 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 401 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) 402 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 403 ; 404 ; GFX12-LABEL: name: load_atomic_flat_p0_seq_cst 405 ; GFX12: liveins: $vgpr0_vgpr1 406 ; GFX12-NEXT: {{ $}} 407 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 408 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) 409 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 410 %0:vgpr(p0) = COPY $vgpr0_vgpr1 411 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) 412 $vgpr0_vgpr1 = COPY %1 413 414... 415 416--- 417 418name: load_atomic_flat_s32_seq_cst_gep_m2048 419legalized: true 420regBankSelected: true 421tracksRegLiveness: true 422 423body: | 424 bb.0: 425 liveins: $vgpr0_vgpr1 426 427 ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 428 ; GFX7: liveins: $vgpr0_vgpr1 429 ; GFX7-NEXT: {{ $}} 430 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 431 ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 432 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 433 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 434 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 435 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 436 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 437 ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 438 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 439 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 440 ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 441 ; 442 ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 443 ; GFX9: liveins: $vgpr0_vgpr1 444 ; GFX9-NEXT: {{ $}} 445 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 446 ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 447 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 448 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 449 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 450 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 451 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 452 ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 453 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 454 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 455 ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 456 ; 457 ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 458 ; GFX10: liveins: $vgpr0_vgpr1 459 ; GFX10-NEXT: {{ $}} 460 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 461 ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 462 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 463 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 464 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 465 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 466 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 467 ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 468 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 469 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 470 ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 471 ; 472 ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 473 ; GFX11: liveins: $vgpr0_vgpr1 474 ; GFX11-NEXT: {{ $}} 475 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 476 ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 477 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 478 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 479 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 480 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 481 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 482 ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 483 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 484 ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 485 ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 486 ; 487 ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 488 ; GFX12: liveins: $vgpr0_vgpr1 489 ; GFX12-NEXT: {{ $}} 490 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 491 ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 492 ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 493 %0:vgpr(p0) = COPY $vgpr0_vgpr1 494 %1:vgpr(s64) = G_CONSTANT i64 -2048 495 %2:vgpr(p0) = G_PTR_ADD %0, %1 496 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) 497 $vgpr0 = COPY %3 498 499... 500 501--- 502 503name: load_atomic_flat_s32_seq_cst_gep_4095 504legalized: true 505regBankSelected: true 506tracksRegLiveness: true 507 508body: | 509 bb.0: 510 liveins: $vgpr0_vgpr1 511 512 ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 513 ; GFX7: liveins: $vgpr0_vgpr1 514 ; GFX7-NEXT: {{ $}} 515 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 516 ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec 517 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 518 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 519 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 520 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 521 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 522 ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 523 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 524 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 525 ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 526 ; 527 ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 528 ; GFX9: liveins: $vgpr0_vgpr1 529 ; GFX9-NEXT: {{ $}} 530 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 531 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 532 ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 533 ; 534 ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 535 ; GFX10: liveins: $vgpr0_vgpr1 536 ; GFX10-NEXT: {{ $}} 537 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 538 ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec 539 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 540 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 541 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 542 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 543 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 544 ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 545 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 546 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 547 ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 548 ; 549 ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 550 ; GFX11: liveins: $vgpr0_vgpr1 551 ; GFX11-NEXT: {{ $}} 552 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 553 ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 554 ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 555 ; 556 ; GFX12-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 557 ; GFX12: liveins: $vgpr0_vgpr1 558 ; GFX12-NEXT: {{ $}} 559 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 560 ; GFX12-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) 561 ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 562 %0:vgpr(p0) = COPY $vgpr0_vgpr1 563 %1:vgpr(s64) = G_CONSTANT i64 4095 564 %2:vgpr(p0) = G_PTR_ADD %0, %1 565 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) 566 $vgpr0 = COPY %3 567 568... 569 570