xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir (revision b356aa3e2da7d1792412783e2c6247538ead75e8)
1# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
2# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
3# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
4# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9
5
6--- |
7  define amdgpu_kernel void @smrd_imm(ptr addrspace(4) %const0) { ret void }
8  define amdgpu_kernel void @smrd_wide() { ret void }
9  define amdgpu_kernel void @constant_address_positive() { ret void }
10  define amdgpu_kernel void @smrd_sgpr() { ret void }
11  define amdgpu_kernel void @smrd_sgpr_imm() { ret void }
12...
13---
14
15name:            smrd_imm
16legalized:       true
17regBankSelected: true
18
19# GCN: body:
20# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
21
22# Immediate offset:
23# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
24# VI:   S_LOAD_DWORD_IMM [[PTR]], 4, 0
25
26# Max immediate offset for SI
27# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
28# VI:   S_LOAD_DWORD_IMM [[PTR]], 1020, 0
29
30# Immediate overflow for SI
31# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
32# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
33# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
34# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
35
36# Max immediate offset for VI
37# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572
38# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
39# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
40
41#
42# Immediate overflow for VI
43# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
44# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
45# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
46
47# Max immediate for CI
48# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869180
49# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
50# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
51# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
52# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
53# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
54# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
55# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
56# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
57# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
58
59# Immediate overflow for CI
60# GCN: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869184
61# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
62# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
63# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
64# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
65# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
66# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
67# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
68# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
69
70# Max 32-bit byte offset
71# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
72# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
73# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
74
75# Overflow 32-bit byte offset
76# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
77# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
78# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
79# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
80# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
81# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
82# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
83# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
84# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
85# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
86
87# Pointer loads
88# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
89# GCN: $sgpr0_sgpr1 = COPY [[AS0]]
90# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
91# GCN: $sgpr0_sgpr1 = COPY [[AS1]]
92# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
93# GCN: $sgpr0_sgpr1 = COPY [[AS4]]
94
95body: |
96  bb.0:
97    liveins: $sgpr0_sgpr1
98
99    %0:sgpr(p4) = COPY $sgpr0_sgpr1
100
101    %1:sgpr(s64) = G_CONSTANT i64 4
102    %2:sgpr(p4) = G_PTR_ADD %0, %1
103    %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4)
104    $sgpr0 = COPY %3
105
106    %4:sgpr(s64) = G_CONSTANT i64 1020
107    %5:sgpr(p4) = G_PTR_ADD %0, %4
108    %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4)
109    $sgpr0 = COPY %6
110
111    %7:sgpr(s64) = G_CONSTANT i64 1024
112    %8:sgpr(p4) = G_PTR_ADD %0, %7
113    %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4)
114    $sgpr0 = COPY %9
115
116    %10:sgpr(s64) = G_CONSTANT i64 1048572
117    %11:sgpr(p4) = G_PTR_ADD %0, %10
118    %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4)
119    $sgpr0 = COPY %12
120
121    %13:sgpr(s64) = G_CONSTANT i64 1048576
122    %14:sgpr(p4) = G_PTR_ADD %0, %13
123    %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4)
124    $sgpr0 = COPY %15
125
126    %16:sgpr(s64) = G_CONSTANT i64 17179869180
127    %17:sgpr(p4) = G_PTR_ADD %0, %16
128    %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4)
129    $sgpr0 = COPY %18
130
131    %19:sgpr(s64) = G_CONSTANT i64 17179869184
132    %20:sgpr(p4) = G_PTR_ADD %0, %19
133    %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4)
134    $sgpr0 = COPY %21
135
136    %22:sgpr(s64) = G_CONSTANT i64 4294967292
137    %23:sgpr(p4) = G_PTR_ADD %0, %22
138    %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4)
139    $sgpr0 = COPY %24
140
141    %25:sgpr(s64) = G_CONSTANT i64 4294967296
142    %26:sgpr(p4) = G_PTR_ADD %0, %25
143    %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4)
144    $sgpr0 = COPY %27
145
146    %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4)
147    $sgpr0_sgpr1 = COPY %28
148
149    %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4)
150    $sgpr0_sgpr1 = COPY %29
151
152    %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4)
153    $sgpr0_sgpr1 = COPY %30
154
155...
156---
157
158name:            smrd_wide
159legalized:       true
160regBankSelected: true
161
162body: |
163  bb.0:
164    liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
165    %0:sgpr(p4) = COPY $sgpr0_sgpr1
166    %1:sgpr(p1) = COPY $sgpr2_sgpr3
167
168    ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
169    ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
170    ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]]
171    %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4)
172    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2
173
174    ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]]
175    %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4)
176    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3
177
178    ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]]
179    %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1)
180    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4
181
182    ; CHECK s_load_dwordx16 [[GLOBAL_PTR]]
183    %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1)
184    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5
185...
186
187
188# Test a load of an offset from a constant base address
189# GCN-LABEL: name: constant_address_positive{{$}}
190# GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44
191
192# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4)
193# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
194
195---
196
197name:            constant_address_positive
198legalized:       true
199regBankSelected: true
200
201body: |
202  bb.0:
203    liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
204    %0:sgpr(p4) = G_CONSTANT i64 44
205    %1:sgpr(s64) = G_CONSTANT i64 64
206    %2:sgpr(p4) = G_PTR_ADD %0, %1
207    %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
208    S_ENDPGM 0, implicit %3
209...
210
211---
212
213# Test a load with a register offset.
214# GCN-LABEL: name: smrd_sgpr{{$}}
215# SICI: S_LOAD_DWORD_SGPR %0, %1, 0
216# VI: S_LOAD_DWORD_SGPR %0, %1, 0
217# GFX9: S_LOAD_DWORD_SGPR_IMM %0, %1, 0, 0
218
219name:            smrd_sgpr
220legalized:       true
221regBankSelected: true
222
223body: |
224  bb.0:
225    liveins: $sgpr0_sgpr1, $sgpr2
226    %0:sgpr(p4) = COPY $sgpr0_sgpr1
227    %1:sgpr(s32) = COPY $sgpr2
228    %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
229    %4:sgpr(p4) = G_PTR_ADD %0, %2
230    %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
231    S_ENDPGM 0, implicit %5
232...
233
234---
235
236# Test a load with a (register + immediate) offset.
237# GCN-LABEL: name: smrd_sgpr_imm{{$}}
238# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1
239# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
240# GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
241
242name:            smrd_sgpr_imm
243legalized:       true
244regBankSelected: true
245
246body: |
247  bb.0:
248    liveins: $sgpr0_sgpr1, $sgpr2
249    %0:sgpr(p4) = COPY $sgpr0_sgpr1
250    %1:sgpr(s32) = COPY $sgpr2
251    %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
252    %4:sgpr(p4) = G_PTR_ADD %0, %2
253    %5:sgpr(s64) = G_CONSTANT i64 16
254    %6:sgpr(p4) = G_PTR_ADD %4, %5
255    %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
256    S_ENDPGM 0, implicit %7
257...
258