xref: /llvm-project/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll (revision aaf50bf34f3a2007221c644384d238666cfc2bc3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6
3; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7
4; RUN: llc -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89
5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
7
8define amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
9; GFX67-LABEL: test_sink_smem_offset_400:
10; GFX67:       ; %bb.0: ; %entry
11; GFX67-NEXT:  .LBB0_1: ; %loop
12; GFX67-NEXT:    ; =>This Inner Loop Header: Depth=1
13; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
14; GFX67-NEXT:    s_load_dword s3, s[0:1], 0x64
15; GFX67-NEXT:    s_add_i32 s2, s2, -1
16; GFX67-NEXT:    s_cmp_lg_u32 s2, 0
17; GFX67-NEXT:    s_cbranch_scc1 .LBB0_1
18; GFX67-NEXT:  ; %bb.2: ; %end
19; GFX67-NEXT:    s_endpgm
20;
21; GFX89-LABEL: test_sink_smem_offset_400:
22; GFX89:       ; %bb.0: ; %entry
23; GFX89-NEXT:  .LBB0_1: ; %loop
24; GFX89-NEXT:    ; =>This Inner Loop Header: Depth=1
25; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
26; GFX89-NEXT:    s_load_dword s3, s[0:1], 0x190
27; GFX89-NEXT:    s_add_i32 s2, s2, -1
28; GFX89-NEXT:    s_cmp_lg_u32 s2, 0
29; GFX89-NEXT:    s_cbranch_scc1 .LBB0_1
30; GFX89-NEXT:  ; %bb.2: ; %end
31; GFX89-NEXT:    s_endpgm
32;
33; GFX12-LABEL: test_sink_smem_offset_400:
34; GFX12:       ; %bb.0: ; %entry
35; GFX12-NEXT:  .LBB0_1: ; %loop
36; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
37; GFX12-NEXT:    s_wait_kmcnt 0x0
38; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x190
39; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
40; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
41; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
42; GFX12-NEXT:    s_cbranch_scc1 .LBB0_1
43; GFX12-NEXT:  ; %bb.2: ; %end
44; GFX12-NEXT:    s_endpgm
45entry:
46  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400
47  br label %loop
48
49loop:
50  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
51  %dec = sub i32 %count, 1
52  %load = load volatile i32, ptr addrspace(4) %gep
53  %cond = icmp eq i32 %dec, 0
54  br i1 %cond, label %end, label %loop
55
56end:
57  ret void
58}
59
60define amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
61; GFX6-LABEL: test_sink_smem_offset_4000:
62; GFX6:       ; %bb.0: ; %entry
63; GFX6-NEXT:    s_add_u32 s0, s0, 0xfa0
64; GFX6-NEXT:    s_addc_u32 s1, s1, 0
65; GFX6-NEXT:  .LBB1_1: ; %loop
66; GFX6-NEXT:    ; =>This Inner Loop Header: Depth=1
67; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x0
69; GFX6-NEXT:    s_add_i32 s2, s2, -1
70; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
71; GFX6-NEXT:    s_cbranch_scc1 .LBB1_1
72; GFX6-NEXT:  ; %bb.2: ; %end
73; GFX6-NEXT:    s_endpgm
74;
75; GFX7-LABEL: test_sink_smem_offset_4000:
76; GFX7:       ; %bb.0: ; %entry
77; GFX7-NEXT:  .LBB1_1: ; %loop
78; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
79; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
80; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x3e8
81; GFX7-NEXT:    s_add_i32 s2, s2, -1
82; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
83; GFX7-NEXT:    s_cbranch_scc1 .LBB1_1
84; GFX7-NEXT:  ; %bb.2: ; %end
85; GFX7-NEXT:    s_endpgm
86;
87; GFX89-LABEL: test_sink_smem_offset_4000:
88; GFX89:       ; %bb.0: ; %entry
89; GFX89-NEXT:  .LBB1_1: ; %loop
90; GFX89-NEXT:    ; =>This Inner Loop Header: Depth=1
91; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
92; GFX89-NEXT:    s_load_dword s3, s[0:1], 0xfa0
93; GFX89-NEXT:    s_add_i32 s2, s2, -1
94; GFX89-NEXT:    s_cmp_lg_u32 s2, 0
95; GFX89-NEXT:    s_cbranch_scc1 .LBB1_1
96; GFX89-NEXT:  ; %bb.2: ; %end
97; GFX89-NEXT:    s_endpgm
98;
99; GFX12-LABEL: test_sink_smem_offset_4000:
100; GFX12:       ; %bb.0: ; %entry
101; GFX12-NEXT:  .LBB1_1: ; %loop
102; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
103; GFX12-NEXT:    s_wait_kmcnt 0x0
104; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0xfa0
105; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
106; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
107; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
108; GFX12-NEXT:    s_cbranch_scc1 .LBB1_1
109; GFX12-NEXT:  ; %bb.2: ; %end
110; GFX12-NEXT:    s_endpgm
111entry:
112  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000
113  br label %loop
114
115loop:
116  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
117  %dec = sub i32 %count, 1
118  %load = load volatile i32, ptr addrspace(4) %gep
119  %cond = icmp eq i32 %dec, 0
120  br i1 %cond, label %end, label %loop
121
122end:
123  ret void
124}
125
126define amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
127; GFX689-LABEL: test_sink_smem_offset_4000000:
128; GFX689:       ; %bb.0: ; %entry
129; GFX689-NEXT:    s_add_u32 s0, s0, 0x3d0900
130; GFX689-NEXT:    s_addc_u32 s1, s1, 0
131; GFX689-NEXT:  .LBB2_1: ; %loop
132; GFX689-NEXT:    ; =>This Inner Loop Header: Depth=1
133; GFX689-NEXT:    s_waitcnt lgkmcnt(0)
134; GFX689-NEXT:    s_load_dword s3, s[0:1], 0x0
135; GFX689-NEXT:    s_add_i32 s2, s2, -1
136; GFX689-NEXT:    s_cmp_lg_u32 s2, 0
137; GFX689-NEXT:    s_cbranch_scc1 .LBB2_1
138; GFX689-NEXT:  ; %bb.2: ; %end
139; GFX689-NEXT:    s_endpgm
140;
141; GFX7-LABEL: test_sink_smem_offset_4000000:
142; GFX7:       ; %bb.0: ; %entry
143; GFX7-NEXT:  .LBB2_1: ; %loop
144; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
145; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
146; GFX7-NEXT:    s_load_dword s3, s[0:1], 0xf4240
147; GFX7-NEXT:    s_add_i32 s2, s2, -1
148; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
149; GFX7-NEXT:    s_cbranch_scc1 .LBB2_1
150; GFX7-NEXT:  ; %bb.2: ; %end
151; GFX7-NEXT:    s_endpgm
152;
153; GFX12-LABEL: test_sink_smem_offset_4000000:
154; GFX12:       ; %bb.0: ; %entry
155; GFX12-NEXT:  .LBB2_1: ; %loop
156; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
157; GFX12-NEXT:    s_wait_kmcnt 0x0
158; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x3d0900
159; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
160; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
161; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
162; GFX12-NEXT:    s_cbranch_scc1 .LBB2_1
163; GFX12-NEXT:  ; %bb.2: ; %end
164; GFX12-NEXT:    s_endpgm
165entry:
166  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000
167  br label %loop
168
169loop:
170  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
171  %dec = sub i32 %count, 1
172  %load = load volatile i32, ptr addrspace(4) %gep
173  %cond = icmp eq i32 %dec, 0
174  br i1 %cond, label %end, label %loop
175
176end:
177  ret void
178}
179
180define amdgpu_cs void @test_sink_smem_offset_40000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
181; GFX689-LABEL: test_sink_smem_offset_40000000:
182; GFX689:       ; %bb.0: ; %entry
183; GFX689-NEXT:    s_add_u32 s0, s0, 0x2625a00
184; GFX689-NEXT:    s_addc_u32 s1, s1, 0
185; GFX689-NEXT:  .LBB3_1: ; %loop
186; GFX689-NEXT:    ; =>This Inner Loop Header: Depth=1
187; GFX689-NEXT:    s_waitcnt lgkmcnt(0)
188; GFX689-NEXT:    s_load_dword s3, s[0:1], 0x0
189; GFX689-NEXT:    s_add_i32 s2, s2, -1
190; GFX689-NEXT:    s_cmp_lg_u32 s2, 0
191; GFX689-NEXT:    s_cbranch_scc1 .LBB3_1
192; GFX689-NEXT:  ; %bb.2: ; %end
193; GFX689-NEXT:    s_endpgm
194;
195; GFX7-LABEL: test_sink_smem_offset_40000000:
196; GFX7:       ; %bb.0: ; %entry
197; GFX7-NEXT:  .LBB3_1: ; %loop
198; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
199; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
200; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x989680
201; GFX7-NEXT:    s_add_i32 s2, s2, -1
202; GFX7-NEXT:    s_cmp_lg_u32 s2, 0
203; GFX7-NEXT:    s_cbranch_scc1 .LBB3_1
204; GFX7-NEXT:  ; %bb.2: ; %end
205; GFX7-NEXT:    s_endpgm
206;
207; GFX12-LABEL: test_sink_smem_offset_40000000:
208; GFX12:       ; %bb.0: ; %entry
209; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], 0x2625a00
210; GFX12-NEXT:  .LBB3_1: ; %loop
211; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
212; GFX12-NEXT:    s_wait_kmcnt 0x0
213; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
214; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
215; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
216; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
217; GFX12-NEXT:    s_cbranch_scc1 .LBB3_1
218; GFX12-NEXT:  ; %bb.2: ; %end
219; GFX12-NEXT:    s_endpgm
220entry:
221  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000
222  br label %loop
223
224loop:
225  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
226  %dec = sub i32 %count, 1
227  %load = load volatile i32, ptr addrspace(4) %gep
228  %cond = icmp eq i32 %dec, 0
229  br i1 %cond, label %end, label %loop
230
231end:
232  ret void
233}
234
235define amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
236; GFX6789-LABEL: test_sink_smem_offset_40000000000:
237; GFX6789:       ; %bb.0: ; %entry
238; GFX6789-NEXT:    s_add_u32 s0, s0, 0x502f9000
239; GFX6789-NEXT:    s_addc_u32 s1, s1, 9
240; GFX6789-NEXT:  .LBB4_1: ; %loop
241; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
242; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX6789-NEXT:    s_load_dword s3, s[0:1], 0x0
244; GFX6789-NEXT:    s_add_i32 s2, s2, -1
245; GFX6789-NEXT:    s_cmp_lg_u32 s2, 0
246; GFX6789-NEXT:    s_cbranch_scc1 .LBB4_1
247; GFX6789-NEXT:  ; %bb.2: ; %end
248; GFX6789-NEXT:    s_endpgm
249;
250; GFX12-LABEL: test_sink_smem_offset_40000000000:
251; GFX12:       ; %bb.0: ; %entry
252; GFX12-NEXT:    s_mov_b32 s4, 0x502f9000
253; GFX12-NEXT:    s_mov_b32 s5, 9
254; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
255; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[4:5]
256; GFX12-NEXT:  .LBB4_1: ; %loop
257; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
258; GFX12-NEXT:    s_wait_kmcnt 0x0
259; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
260; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
261; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
262; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
263; GFX12-NEXT:    s_cbranch_scc1 .LBB4_1
264; GFX12-NEXT:  ; %bb.2: ; %end
265; GFX12-NEXT:    s_endpgm
266entry:
267  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000
268  br label %loop
269
270loop:
271  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
272  %dec = sub i32 %count, 1
273  %load = load volatile i32, ptr addrspace(4) %gep
274  %cond = icmp eq i32 %dec, 0
275  br i1 %cond, label %end, label %loop
276
277end:
278  ret void
279}
280
281define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
282; GFX6789-LABEL: test_sink_smem_offset_neg400:
283; GFX6789:       ; %bb.0: ; %entry
284; GFX6789-NEXT:    s_add_u32 s0, s0, 0xfffffe70
285; GFX6789-NEXT:    s_addc_u32 s1, s1, -1
286; GFX6789-NEXT:  .LBB5_1: ; %loop
287; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
288; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
289; GFX6789-NEXT:    s_load_dword s3, s[0:1], 0x0
290; GFX6789-NEXT:    s_add_i32 s2, s2, -1
291; GFX6789-NEXT:    s_cmp_lg_u32 s2, 0
292; GFX6789-NEXT:    s_cbranch_scc1 .LBB5_1
293; GFX6789-NEXT:  ; %bb.2: ; %end
294; GFX6789-NEXT:    s_endpgm
295;
296; GFX12-LABEL: test_sink_smem_offset_neg400:
297; GFX12:       ; %bb.0: ; %entry
298; GFX12-NEXT:    s_movk_i32 s4, 0xfe70
299; GFX12-NEXT:    s_mov_b32 s5, -1
300; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
301; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[4:5]
302; GFX12-NEXT:  .LBB5_1: ; %loop
303; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
304; GFX12-NEXT:    s_wait_kmcnt 0x0
305; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x0
306; GFX12-NEXT:    s_add_co_i32 s2, s2, -1
307; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
308; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
309; GFX12-NEXT:    s_cbranch_scc1 .LBB5_1
310; GFX12-NEXT:  ; %bb.2: ; %end
311; GFX12-NEXT:    s_endpgm
312entry:
313  %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400
314  br label %loop
315
316loop:
317  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
318  %dec = sub i32 %count, 1
319  %load = load volatile i32, ptr addrspace(4) %gep
320  %cond = icmp eq i32 %dec, 0
321  br i1 %cond, label %end, label %loop
322
323end:
324  ret void
325}
326
327; Same for address space 6, constant 32-bit.
328define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
329; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
330; GFX6789:       ; %bb.0: ; %entry
331; GFX6789-NEXT:    s_add_i32 s2, s0, 0xfffffe70
332; GFX6789-NEXT:    s_mov_b32 s3, 0
333; GFX6789-NEXT:  .LBB6_1: ; %loop
334; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
335; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX6789-NEXT:    s_load_dword s0, s[2:3], 0x0
337; GFX6789-NEXT:    s_add_i32 s1, s1, -1
338; GFX6789-NEXT:    s_cmp_lg_u32 s1, 0
339; GFX6789-NEXT:    s_cbranch_scc1 .LBB6_1
340; GFX6789-NEXT:  ; %bb.2: ; %end
341; GFX6789-NEXT:    s_endpgm
342;
343; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
344; GFX12:       ; %bb.0: ; %entry
345; GFX12-NEXT:    s_add_co_i32 s2, s0, 0xfffffe70
346; GFX12-NEXT:    s_mov_b32 s3, 0
347; GFX12-NEXT:  .LBB6_1: ; %loop
348; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
349; GFX12-NEXT:    s_wait_kmcnt 0x0
350; GFX12-NEXT:    s_load_b32 s0, s[2:3], 0x0
351; GFX12-NEXT:    s_add_co_i32 s1, s1, -1
352; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
353; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
354; GFX12-NEXT:    s_cbranch_scc1 .LBB6_1
355; GFX12-NEXT:  ; %bb.2: ; %end
356; GFX12-NEXT:    s_endpgm
357entry:
358  %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
359  br label %loop
360
361loop:
362  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
363  %dec = sub i32 %count, 1
364  %load = load volatile i32, ptr addrspace(6) %gep
365  %cond = icmp eq i32 %dec, 0
366  br i1 %cond, label %end, label %loop
367
368end:
369  ret void
370}
371
372;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
373; GFX678: {{.*}}
374; GFX9: {{.*}}
375