1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s 3# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s 4# RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s 5# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s 6# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s 7# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s 8# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s 9 10--- 11 12name: load_atomic_global_s32_seq_cst 13legalized: true 14regBankSelected: true 15tracksRegLiveness: true 16 17body: | 18 bb.0: 19 liveins: $vgpr0_vgpr1 20 21 ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst 22 ; GFX6: liveins: $vgpr0_vgpr1 23 ; GFX6-NEXT: {{ $}} 24 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 25 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 26 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 27 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 28 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 29 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 30 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 31 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 32 ; 33 ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst 34 ; GFX7: liveins: $vgpr0_vgpr1 35 ; GFX7-NEXT: {{ $}} 36 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 37 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 38 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 39 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 40 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 41 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 42 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 43 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 44 ; 45 ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst 46 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 47 ; GFX7-FLAT-NEXT: {{ $}} 48 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 49 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) 50 ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 51 ; 52 ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst 53 ; GFX9: liveins: $vgpr0_vgpr1 54 ; GFX9-NEXT: {{ $}} 55 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 56 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 57 ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 58 ; 59 ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst 60 ; GFX10: liveins: $vgpr0_vgpr1 61 ; GFX10-NEXT: {{ $}} 62 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 63 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 64 ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 65 %0:vgpr(p1) = COPY $vgpr0_vgpr1 66 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 1) 67 $vgpr0 = COPY %1 68 69... 70 71--- 72 73name: load_atomic_global_v2s16_seq_cst 74legalized: true 75regBankSelected: true 76tracksRegLiveness: true 77 78body: | 79 bb.0: 80 liveins: $vgpr0_vgpr1 81 82 ; GFX6-LABEL: name: load_atomic_global_v2s16_seq_cst 83 ; GFX6: liveins: $vgpr0_vgpr1 84 ; GFX6-NEXT: {{ $}} 85 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 86 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) 87 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 88 ; 89 ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst 90 ; GFX7: liveins: $vgpr0_vgpr1 91 ; GFX7-NEXT: {{ $}} 92 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 93 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) 94 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 95 ; 96 ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst 97 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 98 ; GFX7-FLAT-NEXT: {{ $}} 99 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 100 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) 101 ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 102 ; 103 ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst 104 ; GFX9: liveins: $vgpr0_vgpr1 105 ; GFX9-NEXT: {{ $}} 106 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 107 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) 108 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 109 ; 110 ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst 111 ; GFX10: liveins: $vgpr0_vgpr1 112 ; GFX10-NEXT: {{ $}} 113 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 114 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) 115 ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) 116 %0:vgpr(p1) = COPY $vgpr0_vgpr1 117 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 1) 118 $vgpr0 = COPY %1 119 120... 121 122--- 123 124name: load_atomic_global_p3_seq_cst 125legalized: true 126regBankSelected: true 127tracksRegLiveness: true 128 129body: | 130 bb.0: 131 liveins: $vgpr0_vgpr1 132 133 ; GFX6-LABEL: name: load_atomic_global_p3_seq_cst 134 ; GFX6: liveins: $vgpr0_vgpr1 135 ; GFX6-NEXT: {{ $}} 136 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 137 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) 138 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) 139 ; 140 ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst 141 ; GFX7: liveins: $vgpr0_vgpr1 142 ; GFX7-NEXT: {{ $}} 143 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 144 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) 145 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) 146 ; 147 ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst 148 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 149 ; GFX7-FLAT-NEXT: {{ $}} 150 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 151 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) 152 ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](p3) 153 ; 154 ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst 155 ; GFX9: liveins: $vgpr0_vgpr1 156 ; GFX9-NEXT: {{ $}} 157 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 158 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) 159 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) 160 ; 161 ; GFX10-LABEL: name: load_atomic_global_p3_seq_cst 162 ; GFX10: liveins: $vgpr0_vgpr1 163 ; GFX10-NEXT: {{ $}} 164 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 165 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) 166 ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) 167 %0:vgpr(p1) = COPY $vgpr0_vgpr1 168 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 1) 169 $vgpr0 = COPY %1 170 171... 172 173--- 174 175name: load_atomic_global_s64_seq_cst 176legalized: true 177regBankSelected: true 178tracksRegLiveness: true 179 180body: | 181 bb.0: 182 liveins: $vgpr0_vgpr1 183 184 ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst 185 ; GFX6: liveins: $vgpr0_vgpr1 186 ; GFX6-NEXT: {{ $}} 187 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 188 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 189 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 190 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 191 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 192 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 193 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 194 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] 195 ; 196 ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst 197 ; GFX7: liveins: $vgpr0_vgpr1 198 ; GFX7-NEXT: {{ $}} 199 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 200 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 201 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 202 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 203 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 204 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 205 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 206 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] 207 ; 208 ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst 209 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 210 ; GFX7-FLAT-NEXT: {{ $}} 211 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 212 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) 213 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 214 ; 215 ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst 216 ; GFX9: liveins: $vgpr0_vgpr1 217 ; GFX9-NEXT: {{ $}} 218 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 219 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 220 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] 221 ; 222 ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst 223 ; GFX10: liveins: $vgpr0_vgpr1 224 ; GFX10-NEXT: {{ $}} 225 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 226 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 227 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] 228 %0:vgpr(p1) = COPY $vgpr0_vgpr1 229 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 1) 230 $vgpr0_vgpr1 = COPY %1 231 232... 233 234--- 235 236name: load_atomic_global_v2s32_seq_cst 237legalized: true 238regBankSelected: true 239tracksRegLiveness: true 240 241body: | 242 bb.0: 243 liveins: $vgpr0_vgpr1 244 245 ; GFX6-LABEL: name: load_atomic_global_v2s32_seq_cst 246 ; GFX6: liveins: $vgpr0_vgpr1 247 ; GFX6-NEXT: {{ $}} 248 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 249 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) 250 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 251 ; 252 ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst 253 ; GFX7: liveins: $vgpr0_vgpr1 254 ; GFX7-NEXT: {{ $}} 255 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 256 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) 257 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 258 ; 259 ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst 260 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 261 ; GFX7-FLAT-NEXT: {{ $}} 262 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 263 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) 264 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 265 ; 266 ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst 267 ; GFX9: liveins: $vgpr0_vgpr1 268 ; GFX9-NEXT: {{ $}} 269 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 270 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) 271 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 272 ; 273 ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst 274 ; GFX10: liveins: $vgpr0_vgpr1 275 ; GFX10-NEXT: {{ $}} 276 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 277 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) 278 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) 279 %0:vgpr(p1) = COPY $vgpr0_vgpr1 280 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) 281 $vgpr0_vgpr1 = COPY %1 282 283... 284 285--- 286 287name: load_atomic_global_v4s16_seq_cst 288legalized: true 289regBankSelected: true 290tracksRegLiveness: true 291 292body: | 293 bb.0: 294 liveins: $vgpr0_vgpr1 295 296 ; GFX6-LABEL: name: load_atomic_global_v4s16_seq_cst 297 ; GFX6: liveins: $vgpr0_vgpr1 298 ; GFX6-NEXT: {{ $}} 299 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 300 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) 301 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 302 ; 303 ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst 304 ; GFX7: liveins: $vgpr0_vgpr1 305 ; GFX7-NEXT: {{ $}} 306 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 307 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) 308 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 309 ; 310 ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst 311 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 312 ; GFX7-FLAT-NEXT: {{ $}} 313 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 314 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) 315 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 316 ; 317 ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst 318 ; GFX9: liveins: $vgpr0_vgpr1 319 ; GFX9-NEXT: {{ $}} 320 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 321 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) 322 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 323 ; 324 ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst 325 ; GFX10: liveins: $vgpr0_vgpr1 326 ; GFX10-NEXT: {{ $}} 327 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 328 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) 329 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) 330 %0:vgpr(p1) = COPY $vgpr0_vgpr1 331 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 1) 332 $vgpr0_vgpr1 = COPY %1 333 334... 335 336--- 337 338name: load_atomic_global_p1_seq_cst 339legalized: true 340regBankSelected: true 341tracksRegLiveness: true 342 343body: | 344 bb.0: 345 liveins: $vgpr0_vgpr1 346 347 ; GFX6-LABEL: name: load_atomic_global_p1_seq_cst 348 ; GFX6: liveins: $vgpr0_vgpr1 349 ; GFX6-NEXT: {{ $}} 350 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 351 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) 352 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 353 ; 354 ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst 355 ; GFX7: liveins: $vgpr0_vgpr1 356 ; GFX7-NEXT: {{ $}} 357 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 358 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) 359 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 360 ; 361 ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst 362 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 363 ; GFX7-FLAT-NEXT: {{ $}} 364 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 365 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) 366 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 367 ; 368 ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst 369 ; GFX9: liveins: $vgpr0_vgpr1 370 ; GFX9-NEXT: {{ $}} 371 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 372 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) 373 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 374 ; 375 ; GFX10-LABEL: name: load_atomic_global_p1_seq_cst 376 ; GFX10: liveins: $vgpr0_vgpr1 377 ; GFX10-NEXT: {{ $}} 378 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 379 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) 380 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) 381 %0:vgpr(p1) = COPY $vgpr0_vgpr1 382 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 1) 383 $vgpr0_vgpr1 = COPY %1 384 385... 386 387--- 388 389name: load_atomic_global_p0_seq_cst 390legalized: true 391regBankSelected: true 392tracksRegLiveness: true 393 394body: | 395 bb.0: 396 liveins: $vgpr0_vgpr1 397 398 ; GFX6-LABEL: name: load_atomic_global_p0_seq_cst 399 ; GFX6: liveins: $vgpr0_vgpr1 400 ; GFX6-NEXT: {{ $}} 401 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 402 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) 403 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 404 ; 405 ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst 406 ; GFX7: liveins: $vgpr0_vgpr1 407 ; GFX7-NEXT: {{ $}} 408 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 409 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) 410 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 411 ; 412 ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst 413 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 414 ; GFX7-FLAT-NEXT: {{ $}} 415 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 416 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) 417 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 418 ; 419 ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst 420 ; GFX9: liveins: $vgpr0_vgpr1 421 ; GFX9-NEXT: {{ $}} 422 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 423 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) 424 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 425 ; 426 ; GFX10-LABEL: name: load_atomic_global_p0_seq_cst 427 ; GFX10: liveins: $vgpr0_vgpr1 428 ; GFX10-NEXT: {{ $}} 429 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 430 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) 431 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) 432 %0:vgpr(p1) = COPY $vgpr0_vgpr1 433 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 1) 434 $vgpr0_vgpr1 = COPY %1 435 436... 437 438--- 439 440name: load_atomic_global_s32_seq_cst_gep_m2048 441legalized: true 442regBankSelected: true 443tracksRegLiveness: true 444 445body: | 446 bb.0: 447 liveins: $vgpr0_vgpr1 448 449 ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 450 ; GFX6: liveins: $vgpr0_vgpr1 451 ; GFX6-NEXT: {{ $}} 452 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 453 ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 454 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 455 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 456 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 457 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 458 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 459 ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 460 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 461 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 462 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 463 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 464 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 465 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 466 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 467 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 468 ; 469 ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 470 ; GFX7: liveins: $vgpr0_vgpr1 471 ; GFX7-NEXT: {{ $}} 472 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 473 ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 474 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 475 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 476 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 477 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 478 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 479 ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 480 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 481 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 482 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 483 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 484 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 485 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 486 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 487 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 488 ; 489 ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 490 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 491 ; GFX7-FLAT-NEXT: {{ $}} 492 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 493 ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 494 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 495 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 496 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 497 ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 498 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 499 ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 500 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 501 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) 502 ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 503 ; 504 ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 505 ; GFX9: liveins: $vgpr0_vgpr1 506 ; GFX9-NEXT: {{ $}} 507 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 508 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 509 ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 510 ; 511 ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 512 ; GFX10: liveins: $vgpr0_vgpr1 513 ; GFX10-NEXT: {{ $}} 514 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 515 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 516 ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 517 %0:vgpr(p1) = COPY $vgpr0_vgpr1 518 %1:vgpr(s64) = G_CONSTANT i64 -2048 519 %2:vgpr(p1) = G_PTR_ADD %0, %1 520 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) 521 $vgpr0 = COPY %3 522 523... 524 525--- 526 527name: load_atomic_global_s32_seq_cst_gep_4095 528legalized: true 529regBankSelected: true 530tracksRegLiveness: true 531 532body: | 533 bb.0: 534 liveins: $vgpr0_vgpr1 535 536 ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 537 ; GFX6: liveins: $vgpr0_vgpr1 538 ; GFX6-NEXT: {{ $}} 539 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 540 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 541 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 542 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 543 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 544 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 545 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 546 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 547 ; 548 ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 549 ; GFX7: liveins: $vgpr0_vgpr1 550 ; GFX7-NEXT: {{ $}} 551 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 552 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 553 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 554 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 555 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 556 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 557 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 558 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] 559 ; 560 ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 561 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 562 ; GFX7-FLAT-NEXT: {{ $}} 563 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 564 ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec 565 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 566 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 567 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 568 ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 569 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 570 ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 571 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 572 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) 573 ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] 574 ; 575 ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 576 ; GFX9: liveins: $vgpr0_vgpr1 577 ; GFX9-NEXT: {{ $}} 578 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 579 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 580 ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 581 ; 582 ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 583 ; GFX10: liveins: $vgpr0_vgpr1 584 ; GFX10-NEXT: {{ $}} 585 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 586 ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec 587 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 588 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 589 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 590 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 591 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 592 ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 593 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 594 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) 595 ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] 596 %0:vgpr(p1) = COPY $vgpr0_vgpr1 597 %1:vgpr(s64) = G_CONSTANT i64 4095 598 %2:vgpr(p1) = G_PTR_ADD %0, %1 599 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) 600 $vgpr0 = COPY %3 601 602... 603 604--- 605 606name: load_atomic_global_s64_seq_cst_gep_m2048 607legalized: true 608regBankSelected: true 609tracksRegLiveness: true 610 611body: | 612 bb.0: 613 liveins: $vgpr0_vgpr1 614 615 ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 616 ; GFX6: liveins: $vgpr0_vgpr1 617 ; GFX6-NEXT: {{ $}} 618 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 619 ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 620 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 621 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 622 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 623 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 624 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 625 ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 626 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 627 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 628 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 629 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 630 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 631 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 632 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 633 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] 634 ; 635 ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 636 ; GFX7: liveins: $vgpr0_vgpr1 637 ; GFX7-NEXT: {{ $}} 638 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 639 ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 640 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 641 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 642 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 643 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 644 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 645 ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 646 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 647 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 648 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 649 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 650 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 651 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 652 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 653 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] 654 ; 655 ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 656 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 657 ; GFX7-FLAT-NEXT: {{ $}} 658 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 659 ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec 660 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 661 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 662 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 663 ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 664 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec 665 ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 666 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 667 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) 668 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] 669 ; 670 ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 671 ; GFX9: liveins: $vgpr0_vgpr1 672 ; GFX9-NEXT: {{ $}} 673 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 674 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 675 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] 676 ; 677 ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 678 ; GFX10: liveins: $vgpr0_vgpr1 679 ; GFX10-NEXT: {{ $}} 680 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 681 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) 682 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] 683 %0:vgpr(p1) = COPY $vgpr0_vgpr1 684 %1:vgpr(s64) = G_CONSTANT i64 -2048 685 %2:vgpr(p1) = G_PTR_ADD %0, %1 686 %3:vgpr(s64) = G_LOAD %2 :: (load seq_cst (s64), align 8, addrspace 1) 687 $vgpr0_vgpr1 = COPY %3 688 689... 690