xref: /llvm-project/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll (revision aaf50bf34f3a2007221c644384d238666cfc2bc3)
1787bef0bSJay Foad; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
29e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6
39e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7
49e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89
59e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9
69e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
7787bef0bSJay Foad
8787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
9787bef0bSJay Foad; GFX67-LABEL: test_sink_smem_offset_400:
10787bef0bSJay Foad; GFX67:       ; %bb.0: ; %entry
11787bef0bSJay Foad; GFX67-NEXT:  .LBB0_1: ; %loop
12787bef0bSJay Foad; GFX67-NEXT:    ; =>This Inner Loop Header: Depth=1
13787bef0bSJay Foad; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
14787bef0bSJay Foad; GFX67-NEXT:    s_load_dword s3, s[0:1], 0x64
15787bef0bSJay Foad; GFX67-NEXT:    s_add_i32 s2, s2, -1
16787bef0bSJay Foad; GFX67-NEXT:    s_cmp_lg_u32 s2, 0
17787bef0bSJay Foad; GFX67-NEXT:    s_cbranch_scc1 .LBB0_1
18787bef0bSJay Foad; GFX67-NEXT:  ; %bb.2: ; %end
19787bef0bSJay Foad; GFX67-NEXT:    s_endpgm
20787bef0bSJay Foad;
21787bef0bSJay Foad; GFX89-LABEL: test_sink_smem_offset_400:
22787bef0bSJay Foad; GFX89:       ; %bb.0: ; %entry
23787bef0bSJay Foad; GFX89-NEXT:  .LBB0_1: ; %loop
24787bef0bSJay Foad; GFX89-NEXT:    ; =>This Inner Loop Header: Depth=1
25787bef0bSJay Foad; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
26787bef0bSJay Foad; GFX89-NEXT:    s_load_dword s3, s[0:1], 0x190
27787bef0bSJay Foad; GFX89-NEXT:    s_add_i32 s2, s2, -1
28787bef0bSJay Foad; GFX89-NEXT:    s_cmp_lg_u32 s2, 0
29787bef0bSJay Foad; GFX89-NEXT:    s_cbranch_scc1 .LBB0_1
30787bef0bSJay Foad; GFX89-NEXT:  ; %bb.2: ; %end
31787bef0bSJay Foad; GFX89-NEXT:    s_endpgm
32a278ac57SMirko Brkušanin;
33a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_400:
34a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
35a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB0_1: ; %loop
36a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
37ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
38a278ac57SMirko Brkušanin; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x190
39a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
40a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
41a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
42a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB0_1
43a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
44a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
45787bef0bSJay Foadentry:
46787bef0bSJay Foad  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400
47787bef0bSJay Foad  br label %loop
48787bef0bSJay Foad
49787bef0bSJay Foadloop:
50787bef0bSJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
51787bef0bSJay Foad  %dec = sub i32 %count, 1
52787bef0bSJay Foad  %load = load volatile i32, ptr addrspace(4) %gep
53787bef0bSJay Foad  %cond = icmp eq i32 %dec, 0
54787bef0bSJay Foad  br i1 %cond, label %end, label %loop
55787bef0bSJay Foad
56787bef0bSJay Foadend:
57787bef0bSJay Foad  ret void
58787bef0bSJay Foad}
59787bef0bSJay Foad
60787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
61787bef0bSJay Foad; GFX6-LABEL: test_sink_smem_offset_4000:
62787bef0bSJay Foad; GFX6:       ; %bb.0: ; %entry
63787bef0bSJay Foad; GFX6-NEXT:    s_add_u32 s0, s0, 0xfa0
64787bef0bSJay Foad; GFX6-NEXT:    s_addc_u32 s1, s1, 0
65787bef0bSJay Foad; GFX6-NEXT:  .LBB1_1: ; %loop
66787bef0bSJay Foad; GFX6-NEXT:    ; =>This Inner Loop Header: Depth=1
67787bef0bSJay Foad; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
68787bef0bSJay Foad; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x0
69787bef0bSJay Foad; GFX6-NEXT:    s_add_i32 s2, s2, -1
70787bef0bSJay Foad; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
71787bef0bSJay Foad; GFX6-NEXT:    s_cbranch_scc1 .LBB1_1
72787bef0bSJay Foad; GFX6-NEXT:  ; %bb.2: ; %end
73787bef0bSJay Foad; GFX6-NEXT:    s_endpgm
74787bef0bSJay Foad;
75787bef0bSJay Foad; GFX7-LABEL: test_sink_smem_offset_4000:
76787bef0bSJay Foad; GFX7:       ; %bb.0: ; %entry
77787bef0bSJay Foad; GFX7-NEXT:  .LBB1_1: ; %loop
78787bef0bSJay Foad; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
79787bef0bSJay Foad; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
80787bef0bSJay Foad; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x3e8
81787bef0bSJay Foad; GFX7-NEXT:    s_add_i32 s2, s2, -1
82787bef0bSJay Foad; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
83787bef0bSJay Foad; GFX7-NEXT:    s_cbranch_scc1 .LBB1_1
84787bef0bSJay Foad; GFX7-NEXT:  ; %bb.2: ; %end
85787bef0bSJay Foad; GFX7-NEXT:    s_endpgm
86787bef0bSJay Foad;
87787bef0bSJay Foad; GFX89-LABEL: test_sink_smem_offset_4000:
88787bef0bSJay Foad; GFX89:       ; %bb.0: ; %entry
89787bef0bSJay Foad; GFX89-NEXT:  .LBB1_1: ; %loop
90787bef0bSJay Foad; GFX89-NEXT:    ; =>This Inner Loop Header: Depth=1
91787bef0bSJay Foad; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
92787bef0bSJay Foad; GFX89-NEXT:    s_load_dword s3, s[0:1], 0xfa0
93787bef0bSJay Foad; GFX89-NEXT:    s_add_i32 s2, s2, -1
94787bef0bSJay Foad; GFX89-NEXT:    s_cmp_lg_u32 s2, 0
95787bef0bSJay Foad; GFX89-NEXT:    s_cbranch_scc1 .LBB1_1
96787bef0bSJay Foad; GFX89-NEXT:  ; %bb.2: ; %end
97787bef0bSJay Foad; GFX89-NEXT:    s_endpgm
98a278ac57SMirko Brkušanin;
99a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_4000:
100a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
101a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB1_1: ; %loop
102a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
103ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
104a278ac57SMirko Brkušanin; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0xfa0
105a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
106a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
107a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
108a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB1_1
109a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
110a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
111787bef0bSJay Foadentry:
112787bef0bSJay Foad  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000
113787bef0bSJay Foad  br label %loop
114787bef0bSJay Foad
115787bef0bSJay Foadloop:
116787bef0bSJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
117787bef0bSJay Foad  %dec = sub i32 %count, 1
118787bef0bSJay Foad  %load = load volatile i32, ptr addrspace(4) %gep
119787bef0bSJay Foad  %cond = icmp eq i32 %dec, 0
120787bef0bSJay Foad  br i1 %cond, label %end, label %loop
121787bef0bSJay Foad
122787bef0bSJay Foadend:
123787bef0bSJay Foad  ret void
124787bef0bSJay Foad}
125787bef0bSJay Foad
126787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
127787bef0bSJay Foad; GFX689-LABEL: test_sink_smem_offset_4000000:
128787bef0bSJay Foad; GFX689:       ; %bb.0: ; %entry
129787bef0bSJay Foad; GFX689-NEXT:    s_add_u32 s0, s0, 0x3d0900
130787bef0bSJay Foad; GFX689-NEXT:    s_addc_u32 s1, s1, 0
131787bef0bSJay Foad; GFX689-NEXT:  .LBB2_1: ; %loop
132787bef0bSJay Foad; GFX689-NEXT:    ; =>This Inner Loop Header: Depth=1
133787bef0bSJay Foad; GFX689-NEXT:    s_waitcnt lgkmcnt(0)
134787bef0bSJay Foad; GFX689-NEXT:    s_load_dword s3, s[0:1], 0x0
135787bef0bSJay Foad; GFX689-NEXT:    s_add_i32 s2, s2, -1
136787bef0bSJay Foad; GFX689-NEXT:    s_cmp_lg_u32 s2, 0
137787bef0bSJay Foad; GFX689-NEXT:    s_cbranch_scc1 .LBB2_1
138787bef0bSJay Foad; GFX689-NEXT:  ; %bb.2: ; %end
139787bef0bSJay Foad; GFX689-NEXT:    s_endpgm
140787bef0bSJay Foad;
141787bef0bSJay Foad; GFX7-LABEL: test_sink_smem_offset_4000000:
142787bef0bSJay Foad; GFX7:       ; %bb.0: ; %entry
143787bef0bSJay Foad; GFX7-NEXT:  .LBB2_1: ; %loop
144787bef0bSJay Foad; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
145787bef0bSJay Foad; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
146787bef0bSJay Foad; GFX7-NEXT:    s_load_dword s3, s[0:1], 0xf4240
147787bef0bSJay Foad; GFX7-NEXT:    s_add_i32 s2, s2, -1
148787bef0bSJay Foad; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
149787bef0bSJay Foad; GFX7-NEXT:    s_cbranch_scc1 .LBB2_1
150787bef0bSJay Foad; GFX7-NEXT:  ; %bb.2: ; %end
151787bef0bSJay Foad; GFX7-NEXT:    s_endpgm
152a278ac57SMirko Brkušanin;
153a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_4000000:
154a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
155a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB2_1: ; %loop
156a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
157ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
158a278ac57SMirko Brkušanin; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x3d0900
159a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
160a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
161a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
162a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB2_1
163a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
164a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
165787bef0bSJay Foadentry:
166787bef0bSJay Foad  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000
167787bef0bSJay Foad  br label %loop
168787bef0bSJay Foad
169787bef0bSJay Foadloop:
170787bef0bSJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
171787bef0bSJay Foad  %dec = sub i32 %count, 1
172787bef0bSJay Foad  %load = load volatile i32, ptr addrspace(4) %gep
173787bef0bSJay Foad  %cond = icmp eq i32 %dec, 0
174787bef0bSJay Foad  br i1 %cond, label %end, label %loop
175787bef0bSJay Foad
176787bef0bSJay Foadend:
177787bef0bSJay Foad  ret void
178787bef0bSJay Foad}
179787bef0bSJay Foad
180a278ac57SMirko Brkušanindefine amdgpu_cs void @test_sink_smem_offset_40000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
181a278ac57SMirko Brkušanin; GFX689-LABEL: test_sink_smem_offset_40000000:
182a278ac57SMirko Brkušanin; GFX689:       ; %bb.0: ; %entry
183a278ac57SMirko Brkušanin; GFX689-NEXT:    s_add_u32 s0, s0, 0x2625a00
184a278ac57SMirko Brkušanin; GFX689-NEXT:    s_addc_u32 s1, s1, 0
185a278ac57SMirko Brkušanin; GFX689-NEXT:  .LBB3_1: ; %loop
186a278ac57SMirko Brkušanin; GFX689-NEXT:    ; =>This Inner Loop Header: Depth=1
187a278ac57SMirko Brkušanin; GFX689-NEXT:    s_waitcnt lgkmcnt(0)
188a278ac57SMirko Brkušanin; GFX689-NEXT:    s_load_dword s3, s[0:1], 0x0
189a278ac57SMirko Brkušanin; GFX689-NEXT:    s_add_i32 s2, s2, -1
190a278ac57SMirko Brkušanin; GFX689-NEXT:    s_cmp_lg_u32 s2, 0
191a278ac57SMirko Brkušanin; GFX689-NEXT:    s_cbranch_scc1 .LBB3_1
192a278ac57SMirko Brkušanin; GFX689-NEXT:  ; %bb.2: ; %end
193a278ac57SMirko Brkušanin; GFX689-NEXT:    s_endpgm
194a278ac57SMirko Brkušanin;
195a278ac57SMirko Brkušanin; GFX7-LABEL: test_sink_smem_offset_40000000:
196a278ac57SMirko Brkušanin; GFX7:       ; %bb.0: ; %entry
197a278ac57SMirko Brkušanin; GFX7-NEXT:  .LBB3_1: ; %loop
198a278ac57SMirko Brkušanin; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
199a278ac57SMirko Brkušanin; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
200a278ac57SMirko Brkušanin; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x989680
201a278ac57SMirko Brkušanin; GFX7-NEXT:    s_add_i32 s2, s2, -1
202a278ac57SMirko Brkušanin; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
203a278ac57SMirko Brkušanin; GFX7-NEXT:    s_cbranch_scc1 .LBB3_1
204a278ac57SMirko Brkušanin; GFX7-NEXT:  ; %bb.2: ; %end
205a278ac57SMirko Brkušanin; GFX7-NEXT:    s_endpgm
206a278ac57SMirko Brkušanin;
207a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_40000000:
208a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
209a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], 0x2625a00
210a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB3_1: ; %loop
211a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
212ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
213a278ac57SMirko Brkušanin; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
214a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
215a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
216a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
217a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB3_1
218a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
219a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
220a278ac57SMirko Brkušaninentry:
221a278ac57SMirko Brkušanin  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000
222a278ac57SMirko Brkušanin  br label %loop
223a278ac57SMirko Brkušanin
224a278ac57SMirko Brkušaninloop:
225a278ac57SMirko Brkušanin  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
226a278ac57SMirko Brkušanin  %dec = sub i32 %count, 1
227a278ac57SMirko Brkušanin  %load = load volatile i32, ptr addrspace(4) %gep
228a278ac57SMirko Brkušanin  %cond = icmp eq i32 %dec, 0
229a278ac57SMirko Brkušanin  br i1 %cond, label %end, label %loop
230a278ac57SMirko Brkušanin
231a278ac57SMirko Brkušaninend:
232a278ac57SMirko Brkušanin  ret void
233a278ac57SMirko Brkušanin}
234a278ac57SMirko Brkušanin
235787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
236787bef0bSJay Foad; GFX6789-LABEL: test_sink_smem_offset_40000000000:
237787bef0bSJay Foad; GFX6789:       ; %bb.0: ; %entry
238787bef0bSJay Foad; GFX6789-NEXT:    s_add_u32 s0, s0, 0x502f9000
239787bef0bSJay Foad; GFX6789-NEXT:    s_addc_u32 s1, s1, 9
240a278ac57SMirko Brkušanin; GFX6789-NEXT:  .LBB4_1: ; %loop
241787bef0bSJay Foad; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
242787bef0bSJay Foad; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
243787bef0bSJay Foad; GFX6789-NEXT:    s_load_dword s3, s[0:1], 0x0
244787bef0bSJay Foad; GFX6789-NEXT:    s_add_i32 s2, s2, -1
245787bef0bSJay Foad; GFX6789-NEXT:    s_cmp_lg_u32 s2, 0
246a278ac57SMirko Brkušanin; GFX6789-NEXT:    s_cbranch_scc1 .LBB4_1
247787bef0bSJay Foad; GFX6789-NEXT:  ; %bb.2: ; %end
248787bef0bSJay Foad; GFX6789-NEXT:    s_endpgm
249a278ac57SMirko Brkušanin;
250a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_40000000000:
251a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
252a278ac57SMirko Brkušanin; GFX12-NEXT:    s_mov_b32 s4, 0x502f9000
253a278ac57SMirko Brkušanin; GFX12-NEXT:    s_mov_b32 s5, 9
254a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
255a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[4:5]
256a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB4_1: ; %loop
257a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
258ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
259a278ac57SMirko Brkušanin; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
260a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
261a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
262a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
263a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB4_1
264a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
265a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
266787bef0bSJay Foadentry:
267787bef0bSJay Foad  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000
268787bef0bSJay Foad  br label %loop
269787bef0bSJay Foad
270787bef0bSJay Foadloop:
271787bef0bSJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
272787bef0bSJay Foad  %dec = sub i32 %count, 1
273787bef0bSJay Foad  %load = load volatile i32, ptr addrspace(4) %gep
274787bef0bSJay Foad  %cond = icmp eq i32 %dec, 0
275787bef0bSJay Foad  br i1 %cond, label %end, label %loop
276787bef0bSJay Foad
277787bef0bSJay Foadend:
278787bef0bSJay Foad  ret void
279787bef0bSJay Foad}
280787bef0bSJay Foad
281787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
282*aaf50bf3SJay Foad; GFX6789-LABEL: test_sink_smem_offset_neg400:
283*aaf50bf3SJay Foad; GFX6789:       ; %bb.0: ; %entry
284*aaf50bf3SJay Foad; GFX6789-NEXT:    s_add_u32 s0, s0, 0xfffffe70
285*aaf50bf3SJay Foad; GFX6789-NEXT:    s_addc_u32 s1, s1, -1
286*aaf50bf3SJay Foad; GFX6789-NEXT:  .LBB5_1: ; %loop
287*aaf50bf3SJay Foad; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
288*aaf50bf3SJay Foad; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
289*aaf50bf3SJay Foad; GFX6789-NEXT:    s_load_dword s3, s[0:1], 0x0
290*aaf50bf3SJay Foad; GFX6789-NEXT:    s_add_i32 s2, s2, -1
291*aaf50bf3SJay Foad; GFX6789-NEXT:    s_cmp_lg_u32 s2, 0
292*aaf50bf3SJay Foad; GFX6789-NEXT:    s_cbranch_scc1 .LBB5_1
293*aaf50bf3SJay Foad; GFX6789-NEXT:  ; %bb.2: ; %end
294*aaf50bf3SJay Foad; GFX6789-NEXT:    s_endpgm
295a278ac57SMirko Brkušanin;
296a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_neg400:
297a278ac57SMirko Brkušanin; GFX12:       ; %bb.0: ; %entry
2983aef525aSvangthao95; GFX12-NEXT:    s_movk_i32 s4, 0xfe70
2993aef525aSvangthao95; GFX12-NEXT:    s_mov_b32 s5, -1
3003aef525aSvangthao95; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3013aef525aSvangthao95; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[4:5]
302a278ac57SMirko Brkušanin; GFX12-NEXT:  .LBB5_1: ; %loop
303a278ac57SMirko Brkušanin; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
304ba52f06fSJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
3053aef525aSvangthao95; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
306a278ac57SMirko Brkušanin; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
307a278ac57SMirko Brkušanin; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
308a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
309a278ac57SMirko Brkušanin; GFX12-NEXT:    s_cbranch_scc1 .LBB5_1
310a278ac57SMirko Brkušanin; GFX12-NEXT:  ; %bb.2: ; %end
311a278ac57SMirko Brkušanin; GFX12-NEXT:    s_endpgm
312787bef0bSJay Foadentry:
313787bef0bSJay Foad  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400
314787bef0bSJay Foad  br label %loop
315787bef0bSJay Foad
316787bef0bSJay Foadloop:
317787bef0bSJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
318787bef0bSJay Foad  %dec = sub i32 %count, 1
319787bef0bSJay Foad  %load = load volatile i32, ptr addrspace(4) %gep
320787bef0bSJay Foad  %cond = icmp eq i32 %dec, 0
321787bef0bSJay Foad  br i1 %cond, label %end, label %loop
322787bef0bSJay Foad
323787bef0bSJay Foadend:
324787bef0bSJay Foad  ret void
325787bef0bSJay Foad}
326*aaf50bf3SJay Foad
327*aaf50bf3SJay Foad; Same for address space 6, constant 32-bit.
328*aaf50bf3SJay Foaddefine amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
329*aaf50bf3SJay Foad; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
330*aaf50bf3SJay Foad; GFX6789:       ; %bb.0: ; %entry
331*aaf50bf3SJay Foad; GFX6789-NEXT:    s_add_i32 s2, s0, 0xfffffe70
332*aaf50bf3SJay Foad; GFX6789-NEXT:    s_mov_b32 s3, 0
333*aaf50bf3SJay Foad; GFX6789-NEXT:  .LBB6_1: ; %loop
334*aaf50bf3SJay Foad; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
335*aaf50bf3SJay Foad; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
336*aaf50bf3SJay Foad; GFX6789-NEXT:    s_load_dword s0, s[2:3], 0x0
337*aaf50bf3SJay Foad; GFX6789-NEXT:    s_add_i32 s1, s1, -1
338*aaf50bf3SJay Foad; GFX6789-NEXT:    s_cmp_lg_u32 s1, 0
339*aaf50bf3SJay Foad; GFX6789-NEXT:    s_cbranch_scc1 .LBB6_1
340*aaf50bf3SJay Foad; GFX6789-NEXT:  ; %bb.2: ; %end
341*aaf50bf3SJay Foad; GFX6789-NEXT:    s_endpgm
342*aaf50bf3SJay Foad;
343*aaf50bf3SJay Foad; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
344*aaf50bf3SJay Foad; GFX12:       ; %bb.0: ; %entry
345*aaf50bf3SJay Foad; GFX12-NEXT:    s_add_co_i32 s2, s0, 0xfffffe70
346*aaf50bf3SJay Foad; GFX12-NEXT:    s_mov_b32 s3, 0
347*aaf50bf3SJay Foad; GFX12-NEXT:  .LBB6_1: ; %loop
348*aaf50bf3SJay Foad; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
349*aaf50bf3SJay Foad; GFX12-NEXT:    s_wait_kmcnt 0x0
350*aaf50bf3SJay Foad; GFX12-NEXT:    s_load_b32 s0, s[2:3], 0x0
351*aaf50bf3SJay Foad; GFX12-NEXT:    s_add_co_i32 s1, s1, -1
352*aaf50bf3SJay Foad; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
353*aaf50bf3SJay Foad; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
354*aaf50bf3SJay Foad; GFX12-NEXT:    s_cbranch_scc1 .LBB6_1
355*aaf50bf3SJay Foad; GFX12-NEXT:  ; %bb.2: ; %end
356*aaf50bf3SJay Foad; GFX12-NEXT:    s_endpgm
357*aaf50bf3SJay Foadentry:
358*aaf50bf3SJay Foad  %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
359*aaf50bf3SJay Foad  br label %loop
360*aaf50bf3SJay Foad
361*aaf50bf3SJay Foadloop:
362*aaf50bf3SJay Foad  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
363*aaf50bf3SJay Foad  %dec = sub i32 %count, 1
364*aaf50bf3SJay Foad  %load = load volatile i32, ptr addrspace(6) %gep
365*aaf50bf3SJay Foad  %cond = icmp eq i32 %dec, 0
366*aaf50bf3SJay Foad  br i1 %cond, label %end, label %loop
367*aaf50bf3SJay Foad
368*aaf50bf3SJay Foadend:
369*aaf50bf3SJay Foad  ret void
370*aaf50bf3SJay Foad}
371*aaf50bf3SJay Foad
372*aaf50bf3SJay Foad;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
373*aaf50bf3SJay Foad; GFX678: {{.*}}
374*aaf50bf3SJay Foad; GFX9: {{.*}}
375