1# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI 2# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI 3# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI 4# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9 5 6--- | 7 define amdgpu_kernel void @smrd_imm(ptr addrspace(4) %const0) { ret void } 8 define amdgpu_kernel void @smrd_wide() { ret void } 9 define amdgpu_kernel void @constant_address_positive() { ret void } 10 define amdgpu_kernel void @smrd_sgpr() { ret void } 11 define amdgpu_kernel void @smrd_sgpr_imm() { ret void } 12... 13--- 14 15name: smrd_imm 16legalized: true 17regBankSelected: true 18 19# GCN: body: 20# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 21 22# Immediate offset: 23# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 24# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 25 26# Max immediate offset for SI 27# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 28# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 29 30# Immediate overflow for SI 31# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 32# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 33# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 34# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 35 36# Max immediate offset for VI 37# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572 38# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 39# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 40 41# 42# Immediate overflow for VI 43# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 44# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 45# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 46 47# Max immediate for CI 48# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869180 49# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 50# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 51# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 52# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 53# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 54# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 55# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 56# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 57# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 58 59# Immediate overflow for CI 60# GCN: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869184 61# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 62# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 63# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 64# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 65# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 66# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 67# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 68# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 69 70# Max 32-bit byte offset 71# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 72# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 73# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 74 75# Overflow 32-bit byte offset 76# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296 77# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 78# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 79# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] 80# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 81# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 82# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] 83# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 84# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 85# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 86 87# Pointer loads 88# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 89# GCN: $sgpr0_sgpr1 = COPY [[AS0]] 90# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 91# GCN: $sgpr0_sgpr1 = COPY [[AS1]] 92# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 93# GCN: $sgpr0_sgpr1 = COPY [[AS4]] 94 95body: | 96 bb.0: 97 liveins: $sgpr0_sgpr1 98 99 %0:sgpr(p4) = COPY $sgpr0_sgpr1 100 101 %1:sgpr(s64) = G_CONSTANT i64 4 102 %2:sgpr(p4) = G_PTR_ADD %0, %1 103 %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4) 104 $sgpr0 = COPY %3 105 106 %4:sgpr(s64) = G_CONSTANT i64 1020 107 %5:sgpr(p4) = G_PTR_ADD %0, %4 108 %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4) 109 $sgpr0 = COPY %6 110 111 %7:sgpr(s64) = G_CONSTANT i64 1024 112 %8:sgpr(p4) = G_PTR_ADD %0, %7 113 %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4) 114 $sgpr0 = COPY %9 115 116 %10:sgpr(s64) = G_CONSTANT i64 1048572 117 %11:sgpr(p4) = G_PTR_ADD %0, %10 118 %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4) 119 $sgpr0 = COPY %12 120 121 %13:sgpr(s64) = G_CONSTANT i64 1048576 122 %14:sgpr(p4) = G_PTR_ADD %0, %13 123 %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4) 124 $sgpr0 = COPY %15 125 126 %16:sgpr(s64) = G_CONSTANT i64 17179869180 127 %17:sgpr(p4) = G_PTR_ADD %0, %16 128 %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4) 129 $sgpr0 = COPY %18 130 131 %19:sgpr(s64) = G_CONSTANT i64 17179869184 132 %20:sgpr(p4) = G_PTR_ADD %0, %19 133 %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4) 134 $sgpr0 = COPY %21 135 136 %22:sgpr(s64) = G_CONSTANT i64 4294967292 137 %23:sgpr(p4) = G_PTR_ADD %0, %22 138 %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4) 139 $sgpr0 = COPY %24 140 141 %25:sgpr(s64) = G_CONSTANT i64 4294967296 142 %26:sgpr(p4) = G_PTR_ADD %0, %25 143 %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4) 144 $sgpr0 = COPY %27 145 146 %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4) 147 $sgpr0_sgpr1 = COPY %28 148 149 %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4) 150 $sgpr0_sgpr1 = COPY %29 151 152 %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4) 153 $sgpr0_sgpr1 = COPY %30 154 155... 156--- 157 158name: smrd_wide 159legalized: true 160regBankSelected: true 161 162body: | 163 bb.0: 164 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 165 %0:sgpr(p4) = COPY $sgpr0_sgpr1 166 %1:sgpr(p1) = COPY $sgpr2_sgpr3 167 168 ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 169 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 170 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] 171 %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) 172 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 173 174 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]] 175 %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) 176 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 177 178 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]] 179 %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1) 180 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 181 182 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]] 183 %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1) 184 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5 185... 186 187 188# Test a load of an offset from a constant base address 189# GCN-LABEL: name: constant_address_positive{{$}} 190# GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44 191 192# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4) 193# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4) 194 195--- 196 197name: constant_address_positive 198legalized: true 199regBankSelected: true 200 201body: | 202 bb.0: 203 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 204 %0:sgpr(p4) = G_CONSTANT i64 44 205 %1:sgpr(s64) = G_CONSTANT i64 64 206 %2:sgpr(p4) = G_PTR_ADD %0, %1 207 %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4) 208 S_ENDPGM 0, implicit %3 209... 210 211--- 212 213# Test a load with a register offset. 214# GCN-LABEL: name: smrd_sgpr{{$}} 215# SICI: S_LOAD_DWORD_SGPR %0, %1, 0 216# VI: S_LOAD_DWORD_SGPR %0, %1, 0 217# GFX9: S_LOAD_DWORD_SGPR_IMM %0, %1, 0, 0 218 219name: smrd_sgpr 220legalized: true 221regBankSelected: true 222 223body: | 224 bb.0: 225 liveins: $sgpr0_sgpr1, $sgpr2 226 %0:sgpr(p4) = COPY $sgpr0_sgpr1 227 %1:sgpr(s32) = COPY $sgpr2 228 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) 229 %4:sgpr(p4) = G_PTR_ADD %0, %2 230 %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4) 231 S_ENDPGM 0, implicit %5 232... 233 234--- 235 236# Test a load with a (register + immediate) offset. 237# GCN-LABEL: name: smrd_sgpr_imm{{$}} 238# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1 239# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 240# GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, 241 242name: smrd_sgpr_imm 243legalized: true 244regBankSelected: true 245 246body: | 247 bb.0: 248 liveins: $sgpr0_sgpr1, $sgpr2 249 %0:sgpr(p4) = COPY $sgpr0_sgpr1 250 %1:sgpr(s32) = COPY $sgpr2 251 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) 252 %4:sgpr(p4) = G_PTR_ADD %0, %2 253 %5:sgpr(s64) = G_CONSTANT i64 16 254 %6:sgpr(p4) = G_PTR_ADD %4, %5 255 %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4) 256 S_ENDPGM 0, implicit %7 257... 258