xref: /llvm-project/llvm/test/CodeGen/AMDGPU/flat-address-space.ll (revision 41ed16c3b3362e51b7063eaef6461ab704c1ec7a)
1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN,CIVI %s
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,CIVI %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,CIVI,CIVI-HSA %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
7
8; GCN-LABEL: {{^}}store_flat_i32:
9; GCN-DAG: s_load_{{dwordx2|b64}} s[[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
10; GCN-DAG: s_load_{{dword|b32}} s[[SDATA:[0-9]+]],
11; GCN: s_waitcnt lgkmcnt(0)
12; GCN-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
13; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
14; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
15; GCN: flat_store_{{dword|b32}} v[[[LO_VREG]]:[[HI_VREG]]], v[[DATA]]
16define amdgpu_kernel void @store_flat_i32(ptr addrspace(1) %gptr, i32 %x) #0 {
17  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
18  store volatile i32 %x, ptr %fptr, align 4
19  ret void
20}
21
22; GCN-LABEL: {{^}}store_flat_i64:
23; GCN: flat_store_{{dword|b64}}
24define amdgpu_kernel void @store_flat_i64(ptr addrspace(1) %gptr, i64 %x) #0 {
25  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
26  store volatile i64 %x, ptr %fptr, align 8
27  ret void
28}
29
30; GCN-LABEL: {{^}}store_flat_v4i32:
31; GCN: flat_store_{{dword|b128}}
32define amdgpu_kernel void @store_flat_v4i32(ptr addrspace(1) %gptr, <4 x i32> %x) #0 {
33  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
34  store volatile <4 x i32> %x, ptr %fptr, align 16
35  ret void
36}
37
38; GCN-LABEL: {{^}}store_flat_trunc_i16:
39; GCN: flat_store_{{short|b16}}
40define amdgpu_kernel void @store_flat_trunc_i16(ptr addrspace(1) %gptr, i32 %x) #0 {
41  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
42  %y = trunc i32 %x to i16
43  store volatile i16 %y, ptr %fptr, align 2
44  ret void
45}
46
47; GCN-LABEL: {{^}}store_flat_trunc_i8:
48; GCN: flat_store_{{byte|b8}}
49define amdgpu_kernel void @store_flat_trunc_i8(ptr addrspace(1) %gptr, i32 %x) #0 {
50  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
51  %y = trunc i32 %x to i8
52  store volatile i8 %y, ptr %fptr, align 2
53  ret void
54}
55
56
57
58; GCN-LABEL: load_flat_i32:
59; GCN: flat_load_{{dword|b32}}
60define amdgpu_kernel void @load_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
61  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
62  %fload = load volatile i32, ptr %fptr, align 4
63  store i32 %fload, ptr addrspace(1) %out, align 4
64  ret void
65}
66
67; GCN-LABEL: load_flat_i64:
68; GCN: flat_load_{{dword|b64}}
69define amdgpu_kernel void @load_flat_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
70  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
71  %fload = load volatile i64, ptr %fptr, align 8
72  store i64 %fload, ptr addrspace(1) %out, align 8
73  ret void
74}
75
76; GCN-LABEL: load_flat_v4i32:
77; GCN: flat_load_{{dword|b128}}
78define amdgpu_kernel void @load_flat_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
79  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
80  %fload = load volatile <4 x i32>, ptr %fptr, align 32
81  store <4 x i32> %fload, ptr addrspace(1) %out, align 8
82  ret void
83}
84
85; GCN-LABEL: sextload_flat_i8:
86; GCN: flat_load_{{sbyte|i8}}
87define amdgpu_kernel void @sextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
88  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
89  %fload = load volatile i8, ptr %fptr, align 4
90  %ext = sext i8 %fload to i32
91  store i32 %ext, ptr addrspace(1) %out, align 4
92  ret void
93}
94
95; GCN-LABEL: zextload_flat_i8:
96; GCN: flat_load_{{ubyte|u8}}
97define amdgpu_kernel void @zextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
98  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
99  %fload = load volatile i8, ptr %fptr, align 4
100  %ext = zext i8 %fload to i32
101  store i32 %ext, ptr addrspace(1) %out, align 4
102  ret void
103}
104
105; GCN-LABEL: sextload_flat_i16:
106; GCN: flat_load_{{sshort|i16}}
107define amdgpu_kernel void @sextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
108  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
109  %fload = load volatile i16, ptr %fptr, align 4
110  %ext = sext i16 %fload to i32
111  store i32 %ext, ptr addrspace(1) %out, align 4
112  ret void
113}
114
115; GCN-LABEL: zextload_flat_i16:
116; GCN: flat_load_{{ushort|u16}}
117define amdgpu_kernel void @zextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
118  %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
119  %fload = load volatile i16, ptr %fptr, align 4
120  %ext = zext i16 %fload to i32
121  store i32 %ext, ptr addrspace(1) %out, align 4
122  ret void
123}
124
125; GCN-LABEL: flat_scratch_unaligned_load:
126; GFX9: flat_load_dword
127; GFX10PLUS: flat_load_{{dword|b32}}
128define amdgpu_kernel void @flat_scratch_unaligned_load() {
129  %scratch = alloca i32, addrspace(5)
130  %fptr = addrspacecast ptr addrspace(5) %scratch to ptr
131  store volatile ptr %fptr, ptr addrspace(3) null
132  %ld = load volatile i32, ptr %fptr, align 1
133  ret void
134}
135
136; GCN-LABEL: flat_scratch_unaligned_store:
137; GFX9: flat_store_dword
138; GFX10PLUS: flat_store_{{dword|b32}}
139define amdgpu_kernel void @flat_scratch_unaligned_store() {
140  %scratch = alloca i32, addrspace(5)
141  %fptr = addrspacecast ptr addrspace(5) %scratch to ptr
142  store volatile ptr %fptr, ptr addrspace(3) null
143  store volatile i32 0, ptr %fptr, align 1
144  ret void
145}
146
147; GCN-LABEL: flat_scratch_multidword_load_kernel:
148; CIVI-HSA: flat_load_dword v
149; CIVI-HSA: flat_load_dword v
150; GFX9:  flat_load_dwordx2
151; GFX10PLUS: flat_load_{{dwordx2|b64}}
152; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
153define amdgpu_kernel void @flat_scratch_multidword_load_kernel() {
154  %scratch = alloca <2 x i32>, addrspace(5)
155  %fptr = addrspacecast ptr addrspace(5) %scratch to ptr
156  %ld = load volatile <2 x i32>, ptr %fptr
157  ret void
158}
159;
160; GCN-LABEL: flat_scratch_multidword_load_func:
161; CIVI-HSA: flat_load_dword v
162; CIVI-HSA: flat_load_dword v
163; GFX9:  flat_load_dwordx2
164; GFX10PLUS: flat_load_{{dwordx2|b64}}
165; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
166define <2 x i32> @flat_scratch_multidword_load_func(ptr %maybe.scratch) {
167  %load = load <2 x i32>, ptr %maybe.scratch
168  ret <2 x i32> %load
169}
170
171; GCN-LABEL: flat_scratch_multidword_store_kernel:
172; CIVI-HSA: flat_store_dword v
173; CIVI-HSA: flat_store_dword v
174; GFX9:  flat_store_dwordx2
175; GFX10PLUS: flat_store_{{dwordx2|b64}}
176; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
177define amdgpu_kernel void @flat_scratch_multidword_store_kernel() {
178  %scratch = alloca <2 x i32>, addrspace(5)
179  %fptr = addrspacecast ptr addrspace(5) %scratch to ptr
180  store volatile <2 x i32> zeroinitializer, ptr %fptr
181  ret void
182}
183
184; GCN-LABEL: flat_scratch_multidword_store_func:
185; CIVI-HSA: flat_store_dword v
186; CIVI-HSA: flat_store_dword v
187; GFX9:  flat_store_dwordx2
188; GFX10PLUS: flat_store_{{dwordx2|b64}}
189define void @flat_scratch_multidword_store_func(ptr %maybe.scratch) {
190  store <2 x i32> zeroinitializer, ptr %maybe.scratch
191  ret void
192}
193
194; GCN-LABEL: {{^}}store_flat_i8_max_offset:
195; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
196; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
197define amdgpu_kernel void @store_flat_i8_max_offset(ptr %fptr, i8 %x) #0 {
198  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095
199  store volatile i8 %x, ptr %fptr.offset
200  ret void
201}
202
203; GCN-LABEL: {{^}}store_flat_i8_max_offset_p1:
204; GCN: flat_store_{{byte|b8}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{( dlc)?}}{{$}}
205define amdgpu_kernel void @store_flat_i8_max_offset_p1(ptr %fptr, i8 %x) #0 {
206  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096
207  store volatile i8 %x, ptr %fptr.offset
208  ret void
209}
210
211; GCN-LABEL: {{^}}store_flat_i8_neg_offset:
212; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
213
214; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
215; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
216; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
217define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) #0 {
218  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 -2
219  store volatile i8 %x, ptr %fptr.offset
220  ret void
221}
222
223; GCN-LABEL: {{^}}load_flat_i8_max_offset:
224; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
225; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc{{$}}
226; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}}
227; GFX11: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}}
228define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 {
229  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095
230  %val = load volatile i8, ptr %fptr.offset
231  ret void
232}
233
234; GCN-LABEL: {{^}}load_flat_i8_max_offset_p1:
235; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
236; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
237; GFX10PLUS: flat_load_{{ubyte|u8}} v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}}
238define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) #0 {
239  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096
240  %val = load volatile i8, ptr %fptr.offset
241  ret void
242}
243
244; GCN-LABEL: {{^}}load_flat_i8_neg_offset:
245; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
246
247; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
248; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
249; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
250define amdgpu_kernel void @load_flat_i8_neg_offset(ptr %fptr) #0 {
251  %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 -2
252  %val = load volatile i8, ptr %fptr.offset
253  ret void
254}
255
256attributes #0 = { nounwind }
257attributes #1 = { nounwind convergent }
258