xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll (revision 2be0abb7fe72ed4537b3eabcd3102d48ea845717)
1; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4
5declare i32 @llvm.amdgcn.workitem.id.x() #1
6
7; CHECK-LABEL: @merge_v2p1i8(
8; CHECK: load <2 x i64>
9; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1)
10; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1)
11; CHECK: store <2 x i64> zeroinitializer
12define amdgpu_kernel void @merge_v2p1i8(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
13entry:
14  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
15  %b.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %b, i64 1
16
17  %ld.c = load ptr addrspace(1), ptr addrspace(1) %b, align 4
18  %ld.c.idx.1 = load ptr addrspace(1), ptr addrspace(1) %b.1, align 4
19
20  store ptr addrspace(1) null, ptr addrspace(1) %a, align 4
21  store ptr addrspace(1) null, ptr addrspace(1) %a.1, align 4
22
23  ret void
24}
25
26; CHECK-LABEL: @merge_v2p3i8(
27; CHECK: load <2 x i32>
28; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3)
29; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3)
30; CHECK: store <2 x i32> zeroinitializer
31define amdgpu_kernel void @merge_v2p3i8(ptr addrspace(3) nocapture %a, ptr addrspace(3) nocapture readonly %b) #0 {
32entry:
33  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 1
34  %b.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 1
35
36  %ld.c = load ptr addrspace(3), ptr addrspace(3) %b, align 4
37  %ld.c.idx.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 4
38
39  store ptr addrspace(3) null, ptr addrspace(3) %a, align 4
40  store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4
41
42  ret void
43}
44
45; CHECK-LABEL: @merge_ptr_i32(
46; CHECK: load <4 x i32>
47; CHECK: store <4 x i32>
48define amdgpu_kernel void @merge_ptr_i32(ptr addrspace(3) nocapture %a, ptr addrspace(3) nocapture readonly %b) #0 {
49entry:
50  %a.0 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 0
51  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 1
52  %a.2 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 2
53
54  %b.0 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 0
55  %b.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 1
56  %b.2 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 2
57
58  %ld.0 = load i32, ptr addrspace(3) %b.0, align 16
59  %ld.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 4
60  %ld.2 = load <2 x i32>, ptr addrspace(3) %b.2, align 8
61
62  store i32 0, ptr addrspace(3) %a.0, align 16
63  store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4
64  store <2 x i32> <i32 0, i32 0>, ptr addrspace(3) %a.2, align 8
65
66  ret void
67}
68
69; CHECK-LABEL: @merge_ptr_i32_vec_first(
70; CHECK: load <4 x i32>
71; CHECK: store <4 x i32>
72define amdgpu_kernel void @merge_ptr_i32_vec_first(ptr addrspace(3) nocapture %a, ptr addrspace(3) nocapture readonly %b) #0 {
73entry:
74  %a.0 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 0
75  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 2
76  %a.2 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 3
77
78  %b.0 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 0
79  %b.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 2
80  %b.2 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 3
81
82  %ld.0 = load <2 x i32>, ptr addrspace(3) %b.0, align 16
83  %ld.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 8
84  %ld.2 = load i32, ptr addrspace(3) %b.2, align 4
85
86  store <2 x i32> <i32 0, i32 0>, ptr addrspace(3) %a.0, align 16
87  store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 8
88  store i32 0, ptr addrspace(3) %a.2, align 4
89
90  ret void
91}
92
93; CHECK-LABEL: @merge_load_i64_ptr64(
94; CHECK: load <2 x i64>
95; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
96; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1)
97define amdgpu_kernel void @merge_load_i64_ptr64(ptr addrspace(1) nocapture %a) #0 {
98entry:
99  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
100
101  %ld.0 = load i64, ptr addrspace(1) %a
102  %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1
103
104  ret void
105}
106
107; CHECK-LABEL: @merge_load_ptr64_i64(
108; CHECK: load <2 x i64>
109; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
110; CHECK: inttoptr i64 [[ELT0]] to ptr addrspace(1)
111define amdgpu_kernel void @merge_load_ptr64_i64(ptr addrspace(1) nocapture %a) #0 {
112entry:
113  %a.1 =  getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
114
115  %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a
116  %ld.1 = load i64, ptr addrspace(1) %a.1
117
118  ret void
119}
120
121; CHECK-LABEL: @merge_store_ptr64_i64(
122; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64
123; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0
124; CHECK: store <2 x i64>
125define amdgpu_kernel void @merge_store_ptr64_i64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, i64 %val1) #0 {
126entry:
127  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
128
129
130  store ptr addrspace(1) %ptr0, ptr addrspace(1) %a
131  store i64 %val1, ptr addrspace(1) %a.1
132
133  ret void
134}
135
136; CHECK-LABEL: @merge_store_i64_ptr64(
137; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64
138; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1
139; CHECK: store <2 x i64>
140define amdgpu_kernel void @merge_store_i64_ptr64(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(1) %ptr1) #0 {
141entry:
142  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
143
144  store i64 %val0, ptr addrspace(1) %a
145  store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1
146
147  ret void
148}
149
150; CHECK-LABEL: @merge_load_i32_ptr32(
151; CHECK: load <2 x i32>
152; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1
153; CHECK: inttoptr i32 [[ELT1]] to ptr addrspace(3)
154define amdgpu_kernel void @merge_load_i32_ptr32(ptr addrspace(3) nocapture %a) #0 {
155entry:
156  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
157
158  %ld.0 = load i32, ptr addrspace(3) %a
159  %ld.1 = load ptr addrspace(3), ptr addrspace(3) %a.1
160
161  ret void
162}
163
164; CHECK-LABEL: @merge_load_ptr32_i32(
165; CHECK: load <2 x i32>
166; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0
167; CHECK: inttoptr i32 [[ELT0]] to ptr addrspace(3)
168define amdgpu_kernel void @merge_load_ptr32_i32(ptr addrspace(3) nocapture %a) #0 {
169entry:
170  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
171
172  %ld.0 = load ptr addrspace(3), ptr addrspace(3) %a
173  %ld.1 = load i32, ptr addrspace(3) %a.1
174
175  ret void
176}
177
178; CHECK-LABEL: @merge_store_ptr32_i32(
179; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr0 to i32
180; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0
181; CHECK: store <2 x i32>
182define amdgpu_kernel void @merge_store_ptr32_i32(ptr addrspace(3) nocapture %a, ptr addrspace(3) %ptr0, i32 %val1) #0 {
183entry:
184  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
185
186  store ptr addrspace(3) %ptr0, ptr addrspace(3) %a
187  store i32 %val1, ptr addrspace(3) %a.1
188
189  ret void
190}
191
192; CHECK-LABEL: @merge_store_i32_ptr32(
193; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr1 to i32
194; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1
195; CHECK: store <2 x i32>
196define amdgpu_kernel void @merge_store_i32_ptr32(ptr addrspace(3) nocapture %a, i32 %val0, ptr addrspace(3) %ptr1) #0 {
197entry:
198  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i32 1
199
200  store i32 %val0, ptr addrspace(3) %a
201  store ptr addrspace(3) %ptr1, ptr addrspace(3) %a.1
202
203  ret void
204}
205
206; CHECK-LABEL: @no_merge_store_ptr32_i64(
207; CHECK: store ptr addrspace(3)
208; CHECK: store i64
209define amdgpu_kernel void @no_merge_store_ptr32_i64(ptr addrspace(1) nocapture %a, ptr addrspace(3) %ptr0, i64 %val1) #0 {
210entry:
211  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
212
213
214  store ptr addrspace(3) %ptr0, ptr addrspace(1) %a
215  store i64 %val1, ptr addrspace(1) %a.1
216
217  ret void
218}
219
220; CHECK-LABEL: @no_merge_store_i64_ptr32(
221; CHECK: store i64
222; CHECK: store ptr addrspace(3)
223define amdgpu_kernel void @no_merge_store_i64_ptr32(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(3) %ptr1) #0 {
224entry:
225  %a.1 =  getelementptr inbounds ptr addrspace(3), ptr addrspace(1) %a, i64 1
226
227  store i64 %val0, ptr addrspace(1) %a
228  store ptr addrspace(3) %ptr1, ptr addrspace(1) %a.1
229
230  ret void
231}
232
233; CHECK-LABEL: @no_merge_load_i64_ptr32(
234; CHECK: load i64,
235; CHECK: load ptr addrspace(3),
236define amdgpu_kernel void @no_merge_load_i64_ptr32(ptr addrspace(1) nocapture %a) #0 {
237entry:
238  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
239
240  %ld.0 = load i64, ptr addrspace(1) %a
241  %ld.1 = load ptr addrspace(3), ptr addrspace(1) %a.1
242
243  ret void
244}
245
246; CHECK-LABEL: @no_merge_load_ptr32_i64(
247; CHECK: load ptr addrspace(3),
248; CHECK: load i64,
249define amdgpu_kernel void @no_merge_load_ptr32_i64(ptr addrspace(1) nocapture %a) #0 {
250entry:
251  %a.1 =  getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
252
253  %ld.0 = load ptr addrspace(3), ptr addrspace(1) %a
254  %ld.1 = load i64, ptr addrspace(1) %a.1
255
256  ret void
257}
258
259; XXX - This isn't merged for some reason
260; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
261; CHECK: load <2 x ptr addrspace(1)>
262; CHECK: load <2 x ptr addrspace(1)>
263; CHECK: store <2 x ptr addrspace(1)>
264; CHECK: store <2 x ptr addrspace(1)>
265define amdgpu_kernel void @merge_v2p1i8_v2p1i8(ptr addrspace(1) nocapture noalias %a, ptr addrspace(1) nocapture readonly noalias %b) #0 {
266entry:
267  %a.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %a, i64 1
268  %b.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %b, i64 1
269
270  %ld.c = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b, align 4
271  %ld.c.idx.1 = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b.1, align 4
272
273  store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a, align 4
274  store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a.1, align 4
275  ret void
276}
277
278; CHECK-LABEL: @merge_load_ptr64_f64(
279; CHECK: load <2 x i64>
280; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
281; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to ptr addrspace(1)
282; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
283; CHECK: bitcast i64 [[ELT1_INT]] to double
284define amdgpu_kernel void @merge_load_ptr64_f64(ptr addrspace(1) nocapture %a) #0 {
285entry:
286  %a.1 =  getelementptr inbounds double, ptr addrspace(1) %a, i64 1
287
288  %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a
289  %ld.1 = load double, ptr addrspace(1) %a.1
290
291  ret void
292}
293
294; CHECK-LABEL: @merge_load_f64_ptr64(
295; CHECK: load <2 x i64>
296; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
297; CHECK: bitcast i64 [[ELT0]] to double
298; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
299; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1)
300define amdgpu_kernel void @merge_load_f64_ptr64(ptr addrspace(1) nocapture %a) #0 {
301entry:
302  %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1
303
304  %ld.0 = load double, ptr addrspace(1) %a
305  %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1
306
307  ret void
308}
309
310; CHECK-LABEL: @merge_store_ptr64_f64(
311; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64
312; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
313; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64
314; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
315; CHECK: store <2 x i64>
316define amdgpu_kernel void @merge_store_ptr64_f64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, double %val1) #0 {
317entry:
318  %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1
319
320  store ptr addrspace(1) %ptr0, ptr addrspace(1) %a
321  store double %val1, ptr addrspace(1) %a.1
322
323  ret void
324}
325
326; CHECK-LABEL: @merge_store_f64_ptr64(
327; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64
328; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
329; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64
330; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
331; CHECK: store <2 x i64>
332define amdgpu_kernel void @merge_store_f64_ptr64(ptr addrspace(1) nocapture %a, double %val0, ptr addrspace(1) %ptr1) #0 {
333entry:
334  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
335
336  store double %val0, ptr addrspace(1) %a
337  store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1
338
339  ret void
340}
341
342attributes #0 = { nounwind }
343attributes #1 = { nounwind readnone }
344