xref: /llvm-project/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll (revision f2eeb3dc7b438e4216ac6b970129b607d6de31f9)
1; RUN: llc -mtriple=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; Test addressing modes when the scratch base is not a frame index.
6
7; GCN-LABEL: {{^}}store_private_offset_i8:
8; GCN: buffer_store_byte v{{[0-9]+}}, off, s[12:15], 0 offset:8
9define amdgpu_kernel void @store_private_offset_i8() #0 {
10  store volatile i8 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
11  ret void
12}
13
14; GCN-LABEL: {{^}}store_private_offset_i16:
15; GCN: buffer_store_short v{{[0-9]+}}, off, s[12:15], 0 offset:8
16define amdgpu_kernel void @store_private_offset_i16() #0 {
17  store volatile i16 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
18  ret void
19}
20
21; GCN-LABEL: {{^}}store_private_offset_i32:
22; GCN: buffer_store_dword v{{[0-9]+}}, off, s[12:15], 0 offset:8
23define amdgpu_kernel void @store_private_offset_i32() #0 {
24  store volatile i32 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
25  ret void
26}
27
28; GCN-LABEL: {{^}}store_private_offset_v2i32:
29; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8
30define amdgpu_kernel void @store_private_offset_v2i32() #0 {
31  store volatile <2 x i32> <i32 5, i32 10>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
32  ret void
33}
34
35; GCN-LABEL: {{^}}store_private_offset_v4i32:
36; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8
37define amdgpu_kernel void @store_private_offset_v4i32() #0 {
38  store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
39  ret void
40}
41
42; GCN-LABEL: {{^}}load_private_offset_i8:
43; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[12:15], 0 offset:8
44define amdgpu_kernel void @load_private_offset_i8() #0 {
45  %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
46  ret void
47}
48
49; GCN-LABEL: {{^}}sextload_private_offset_i8:
50; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[12:15], 0 offset:8
51define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) #0 {
52  %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
53  %sextload = sext i8 %load to i32
54  store i32 %sextload, ptr addrspace(1) undef
55  ret void
56}
57
58; GCN-LABEL: {{^}}zextload_private_offset_i8:
59; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[12:15], 0 offset:8
60define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) #0 {
61  %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
62  %zextload = zext i8 %load to i32
63  store i32 %zextload, ptr addrspace(1) undef
64  ret void
65}
66
67; GCN-LABEL: {{^}}load_private_offset_i16:
68; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[12:15], 0 offset:8
69define amdgpu_kernel void @load_private_offset_i16() #0 {
70  %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
71  ret void
72}
73
74; GCN-LABEL: {{^}}sextload_private_offset_i16:
75; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[12:15], 0 offset:8
76define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) #0 {
77  %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
78  %sextload = sext i16 %load to i32
79  store i32 %sextload, ptr addrspace(1) undef
80  ret void
81}
82
83; GCN-LABEL: {{^}}zextload_private_offset_i16:
84; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[12:15], 0 offset:8
85define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) #0 {
86  %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
87  %zextload = zext i16 %load to i32
88  store i32 %zextload, ptr addrspace(1) undef
89  ret void
90}
91
92; GCN-LABEL: {{^}}load_private_offset_i32:
93; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], 0 offset:8
94define amdgpu_kernel void @load_private_offset_i32() #0 {
95  %load = load volatile i32, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
96  ret void
97}
98
99; GCN-LABEL: {{^}}load_private_offset_v2i32:
100; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8
101define amdgpu_kernel void @load_private_offset_v2i32() #0 {
102  %load = load volatile <2 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
103  ret void
104}
105
106; GCN-LABEL: {{^}}load_private_offset_v4i32:
107; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8
108define amdgpu_kernel void @load_private_offset_v4i32() #0 {
109  %load = load volatile <4 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
110  ret void
111}
112
113; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
114; GCN: buffer_store_byte v{{[0-9]+}}, off, s[12:15], 0 offset:4095
115define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
116  store volatile i8 5, ptr addrspace(5) inttoptr (i32 4095 to ptr addrspace(5))
117  ret void
118}
119
120; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
121; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
122; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[12:15], 0 offen{{$}}
123define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
124  store volatile i8 5, ptr addrspace(5) inttoptr (i32 4096 to ptr addrspace(5))
125  ret void
126}
127
128; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
129; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
130; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[12:15], 0 offen offset:1{{$}}
131define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
132  store volatile i8 5, ptr addrspace(5) inttoptr (i32 4097 to ptr addrspace(5))
133  ret void
134}
135
136; MUBUF used for stack access has bounds checking enabled before gfx9,
137; so a possibly negative base index can't be used for the vgpr offset.
138
139; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr:
140; SICIVI: {{buffer|flat}}_load_dword [[VADDR:v[0-9]+]],
141; SICIVI: v_lshlrev_b32_e32 [[ADDR:v[0-9]+]], 2, [[VADDR]]
142; SICIVI-NOT [[ADDR]]
143; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR]]
144; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
145
146; GFX9: global_load_dword [[VADDR:v[0-9]+]],
147; GFX9: v_lshlrev_b32_e32 [[ADDR:v[0-9]+]], 2, [[VADDR]]
148; GFX9-NOT [[ADDR]]
149; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen offset:32
150define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
151  %alloca = alloca [16 x i32], align 4, addrspace(5)
152  %vaddr = load volatile i32, ptr addrspace(1) undef
153  %vaddr.off = add i32 %vaddr, 8
154  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %vaddr.off
155  store volatile i32 9, ptr addrspace(5) %gep
156  ret void
157}
158
159attributes #0 = { nounwind }
160