xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll (revision 3aef525aa4b9a5395b6ac4ae771e28e64b27a126)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
6
7; Scalar data prefetch
8
9define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
10; GFX12-LABEL: prefetch_data_sgpr:
11; GFX12:       ; %bb.0: ; %entry
12; GFX12-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
13; GFX12-NEXT:    s_endpgm
14;
15; GFX11-LABEL: prefetch_data_sgpr:
16; GFX11:       ; %bb.0: ; %entry
17; GFX11-NEXT:    s_endpgm
18entry:
19  tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
20  ret void
21}
22
23define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
24; GFX12-LABEL: prefetch_data_sgpr_offset:
25; GFX12:       ; %bb.0: ; %entry
26; GFX12-NEXT:    s_prefetch_data s[0:1], 0x200, null, 0
27; GFX12-NEXT:    s_endpgm
28;
29; GFX11-LABEL: prefetch_data_sgpr_offset:
30; GFX11:       ; %bb.0: ; %entry
31; GFX11-NEXT:    s_endpgm
32entry:
33  %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
34  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
35  ret void
36}
37
38; Check large offsets
39
40define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
41; GFX12-LABEL: prefetch_data_sgpr_max_offset:
42; GFX12:       ; %bb.0: ; %entry
43; GFX12-NEXT:    s_prefetch_data s[0:1], 0x7fffff, null, 0
44; GFX12-NEXT:    s_endpgm
45;
46; GFX11-LABEL: prefetch_data_sgpr_max_offset:
47; GFX11:       ; %bb.0: ; %entry
48; GFX11-NEXT:    s_endpgm
49entry:
50  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
51  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
52  ret void
53}
54
55define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
56; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset:
57; GFX12-SDAG:       ; %bb.0: ; %entry
58; GFX12-SDAG-NEXT:    s_mov_b32 s2, 0xff800000
59; GFX12-SDAG-NEXT:    s_mov_b32 s3, -1
60; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
61; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
62; GFX12-SDAG-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
63; GFX12-SDAG-NEXT:    s_endpgm
64;
65; GFX11-LABEL: prefetch_data_sgpr_min_offset:
66; GFX11:       ; %bb.0: ; %entry
67; GFX11-NEXT:    s_endpgm
68;
69; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset:
70; GFX12-GISEL:       ; %bb.0: ; %entry
71; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xff800000
72; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
73; GFX12-GISEL-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
74; GFX12-GISEL-NEXT:    s_endpgm
75entry:
76  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
77  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
78  ret void
79}
80
81define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
82; GFX12-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
83; GFX12-SDAG:       ; %bb.0: ; %entry
84; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], 0x800000
85; GFX12-SDAG-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
86; GFX12-SDAG-NEXT:    s_endpgm
87;
88; GFX11-LABEL: prefetch_data_sgpr_too_large_offset:
89; GFX11:       ; %bb.0: ; %entry
90; GFX11-NEXT:    s_endpgm
91;
92; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
93; GFX12-GISEL:       ; %bb.0: ; %entry
94; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800000
95; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
96; GFX12-GISEL-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
97; GFX12-GISEL-NEXT:    s_endpgm
98entry:
99  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
100  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
101  ret void
102}
103
104; Check divergent address
105
106define amdgpu_ps void @prefetch_data_vgpr(ptr addrspace(1) %ptr) {
107; GCN-LABEL: prefetch_data_vgpr:
108; GCN:       ; %bb.0: ; %entry
109; GCN-NEXT:    s_endpgm
110entry:
111  tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
112  ret void
113}
114
115; Check LDS and Scratch, we cannot prefetch it
116
117define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
118; GCN-LABEL: prefetch_data_lds:
119; GCN:       ; %bb.0: ; %entry
120; GCN-NEXT:    s_endpgm
121entry:
122  tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
123  ret void
124}
125
126define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
127; GCN-LABEL: prefetch_data_scratch:
128; GCN:       ; %bb.0: ; %entry
129; GCN-NEXT:    s_endpgm
130entry:
131  tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
132  ret void
133}
134
135; Check supported address spaces
136
137define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
138; GFX12-LABEL: prefetch_data_sgpr_flat:
139; GFX12:       ; %bb.0: ; %entry
140; GFX12-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
141; GFX12-NEXT:    s_endpgm
142;
143; GFX11-LABEL: prefetch_data_sgpr_flat:
144; GFX11:       ; %bb.0: ; %entry
145; GFX11-NEXT:    s_endpgm
146entry:
147  tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
148  ret void
149}
150
151define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
152; GFX12-LABEL: prefetch_data_sgpr_global:
153; GFX12:       ; %bb.0: ; %entry
154; GFX12-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
155; GFX12-NEXT:    s_endpgm
156;
157; GFX11-LABEL: prefetch_data_sgpr_global:
158; GFX11:       ; %bb.0: ; %entry
159; GFX11-NEXT:    s_endpgm
160entry:
161  tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
162  ret void
163}
164
165define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
166; GFX12-LABEL: prefetch_data_sgpr_constant_32bit:
167; GFX12:       ; %bb.0: ; %entry
168; GFX12-NEXT:    s_mov_b32 s1, 0
169; GFX12-NEXT:    s_prefetch_data s[0:1], 0x0, null, 0
170; GFX12-NEXT:    s_endpgm
171;
172; GFX11-LABEL: prefetch_data_sgpr_constant_32bit:
173; GFX11:       ; %bb.0: ; %entry
174; GFX11-NEXT:    s_endpgm
175entry:
176  tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
177  ret void
178}
179
180; I$ prefetch
181
182define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
183; GFX12-LABEL: prefetch_inst_sgpr:
184; GFX12:       ; %bb.0: ; %entry
185; GFX12-NEXT:    s_prefetch_inst s[0:1], 0x0, null, 0
186; GFX12-NEXT:    s_endpgm
187;
188; GFX11-LABEL: prefetch_inst_sgpr:
189; GFX11:       ; %bb.0: ; %entry
190; GFX11-NEXT:    s_endpgm
191entry:
192  tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
193  ret void
194}
195
196define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
197; GFX12-LABEL: prefetch_inst_sgpr_offset:
198; GFX12:       ; %bb.0: ; %entry
199; GFX12-NEXT:    s_prefetch_inst s[0:1], 0x80, null, 0
200; GFX12-NEXT:    s_endpgm
201;
202; GFX11-LABEL: prefetch_inst_sgpr_offset:
203; GFX11:       ; %bb.0: ; %entry
204; GFX11-NEXT:    s_endpgm
205entry:
206  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
207  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
208  ret void
209}
210
211; Check large offsets
212
213define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
214; GFX12-LABEL: prefetch_inst_sgpr_max_offset:
215; GFX12:       ; %bb.0: ; %entry
216; GFX12-NEXT:    s_prefetch_inst s[0:1], 0x7fffff, null, 0
217; GFX12-NEXT:    s_endpgm
218;
219; GFX11-LABEL: prefetch_inst_sgpr_max_offset:
220; GFX11:       ; %bb.0: ; %entry
221; GFX11-NEXT:    s_endpgm
222entry:
223  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
224  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
225  ret void
226}
227
228define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
229; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
230; GFX12-SDAG:       ; %bb.0: ; %entry
231; GFX12-SDAG-NEXT:    s_mov_b32 s2, 0xff800000
232; GFX12-SDAG-NEXT:    s_mov_b32 s3, -1
233; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
234; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
235; GFX12-SDAG-NEXT:    s_prefetch_inst s[0:1], 0x0, null, 0
236; GFX12-SDAG-NEXT:    s_endpgm
237;
238; GFX11-LABEL: prefetch_inst_sgpr_min_offset:
239; GFX11:       ; %bb.0: ; %entry
240; GFX11-NEXT:    s_endpgm
241;
242; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
243; GFX12-GISEL:       ; %bb.0: ; %entry
244; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xff800000
245; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, -1
246; GFX12-GISEL-NEXT:    s_prefetch_inst s[0:1], 0x0, null, 0
247; GFX12-GISEL-NEXT:    s_endpgm
248entry:
249  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
250  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
251  ret void
252}
253
254define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
255; GFX12-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
256; GFX12-SDAG:       ; %bb.0: ; %entry
257; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], 0x800000
258; GFX12-SDAG-NEXT:    s_prefetch_inst s[0:1], 0x0, null, 0
259; GFX12-SDAG-NEXT:    s_endpgm
260;
261; GFX11-LABEL: prefetch_inst_sgpr_too_large_offset:
262; GFX11:       ; %bb.0: ; %entry
263; GFX11-NEXT:    s_endpgm
264;
265; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
266; GFX12-GISEL:       ; %bb.0: ; %entry
267; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800000
268; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0
269; GFX12-GISEL-NEXT:    s_prefetch_inst s[0:1], 0x0, null, 0
270; GFX12-GISEL-NEXT:    s_endpgm
271entry:
272  %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
273  tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
274  ret void
275}
276
277declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
278declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
279declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
280declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
281declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
282declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)
283