xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll (revision f9c1ede2543b37fabe9f2d8f8fed5073c475d850)
1; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4
5declare i32 @llvm.amdgcn.workitem.id.x() #1
6
7; CHECK-LABEL: @merge_v2p1i8(
8; CHECK: load <2 x i64>
9; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1)
10; CHECK: inttoptr i64 %{{[^ ]+}} to ptr addrspace(1)
11; CHECK: store <2 x i64> zeroinitializer
12define amdgpu_kernel void @merge_v2p1i8(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
13entry:
14  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
15  %b.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %b, i64 1
16
17  %ld.c = load ptr addrspace(1), ptr addrspace(1) %b, align 4
18  %ld.c.idx.1 = load ptr addrspace(1), ptr addrspace(1) %b.1, align 4
19
20  store ptr addrspace(1) null, ptr addrspace(1) %a, align 4
21  store ptr addrspace(1) null, ptr addrspace(1) %a.1, align 4
22
23  ret void
24}
25
26; CHECK-LABEL: @merge_v2p3i8(
27; CHECK: load <2 x i32>
28; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3)
29; CHECK: inttoptr i32 %{{[^ ]+}} to ptr addrspace(3)
30; CHECK: store <2 x i32> zeroinitializer
31define amdgpu_kernel void @merge_v2p3i8(ptr addrspace(3) nocapture %a, ptr addrspace(3) nocapture readonly %b) #0 {
32entry:
33  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i64 1
34  %b.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %b, i64 1
35
36  %ld.c = load ptr addrspace(3), ptr addrspace(3) %b, align 4
37  %ld.c.idx.1 = load ptr addrspace(3), ptr addrspace(3) %b.1, align 4
38
39  store ptr addrspace(3) null, ptr addrspace(3) %a, align 4
40  store ptr addrspace(3) null, ptr addrspace(3) %a.1, align 4
41
42  ret void
43}
44
45; CHECK-LABEL: @merge_load_i64_ptr64(
46; CHECK: load <2 x i64>
47; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
48; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1)
49define amdgpu_kernel void @merge_load_i64_ptr64(ptr addrspace(1) nocapture %a) #0 {
50entry:
51  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
52
53  %ld.0 = load i64, ptr addrspace(1) %a
54  %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1
55
56  ret void
57}
58
59; CHECK-LABEL: @merge_load_ptr64_i64(
60; CHECK: load <2 x i64>
61; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
62; CHECK: inttoptr i64 [[ELT0]] to ptr addrspace(1)
63define amdgpu_kernel void @merge_load_ptr64_i64(ptr addrspace(1) nocapture %a) #0 {
64entry:
65  %a.1 =  getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
66
67  %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a
68  %ld.1 = load i64, ptr addrspace(1) %a.1
69
70  ret void
71}
72
73; CHECK-LABEL: @merge_store_ptr64_i64(
74; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64
75; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0
76; CHECK: store <2 x i64>
77define amdgpu_kernel void @merge_store_ptr64_i64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, i64 %val1) #0 {
78entry:
79  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
80
81
82  store ptr addrspace(1) %ptr0, ptr addrspace(1) %a
83  store i64 %val1, ptr addrspace(1) %a.1
84
85  ret void
86}
87
88; CHECK-LABEL: @merge_store_i64_ptr64(
89; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64
90; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1
91; CHECK: store <2 x i64>
92define amdgpu_kernel void @merge_store_i64_ptr64(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(1) %ptr1) #0 {
93entry:
94  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
95
96  store i64 %val0, ptr addrspace(1) %a
97  store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1
98
99  ret void
100}
101
102; CHECK-LABEL: @merge_load_i32_ptr32(
103; CHECK: load <2 x i32>
104; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1
105; CHECK: inttoptr i32 [[ELT1]] to ptr addrspace(3)
106define amdgpu_kernel void @merge_load_i32_ptr32(ptr addrspace(3) nocapture %a) #0 {
107entry:
108  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
109
110  %ld.0 = load i32, ptr addrspace(3) %a
111  %ld.1 = load ptr addrspace(3), ptr addrspace(3) %a.1
112
113  ret void
114}
115
116; CHECK-LABEL: @merge_load_ptr32_i32(
117; CHECK: load <2 x i32>
118; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0
119; CHECK: inttoptr i32 [[ELT0]] to ptr addrspace(3)
120define amdgpu_kernel void @merge_load_ptr32_i32(ptr addrspace(3) nocapture %a) #0 {
121entry:
122  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
123
124  %ld.0 = load ptr addrspace(3), ptr addrspace(3) %a
125  %ld.1 = load i32, ptr addrspace(3) %a.1
126
127  ret void
128}
129
130; CHECK-LABEL: @merge_store_ptr32_i32(
131; CHECK: [[ELT0:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr0 to i32
132; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0
133; CHECK: store <2 x i32>
134define amdgpu_kernel void @merge_store_ptr32_i32(ptr addrspace(3) nocapture %a, ptr addrspace(3) %ptr0, i32 %val1) #0 {
135entry:
136  %a.1 = getelementptr inbounds i32, ptr addrspace(3) %a, i32 1
137
138  store ptr addrspace(3) %ptr0, ptr addrspace(3) %a
139  store i32 %val1, ptr addrspace(3) %a.1
140
141  ret void
142}
143
144; CHECK-LABEL: @merge_store_i32_ptr32(
145; CHECK: [[ELT1:%[^ ]+]] = ptrtoint ptr addrspace(3) %ptr1 to i32
146; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1
147; CHECK: store <2 x i32>
148define amdgpu_kernel void @merge_store_i32_ptr32(ptr addrspace(3) nocapture %a, i32 %val0, ptr addrspace(3) %ptr1) #0 {
149entry:
150  %a.1 = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %a, i32 1
151
152  store i32 %val0, ptr addrspace(3) %a
153  store ptr addrspace(3) %ptr1, ptr addrspace(3) %a.1
154
155  ret void
156}
157
158; CHECK-LABEL: @no_merge_store_ptr32_i64(
159; CHECK: store ptr addrspace(3)
160; CHECK: store i64
161define amdgpu_kernel void @no_merge_store_ptr32_i64(ptr addrspace(1) nocapture %a, ptr addrspace(3) %ptr0, i64 %val1) #0 {
162entry:
163  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
164
165
166  store ptr addrspace(3) %ptr0, ptr addrspace(1) %a
167  store i64 %val1, ptr addrspace(1) %a.1
168
169  ret void
170}
171
172; CHECK-LABEL: @no_merge_store_i64_ptr32(
173; CHECK: store i64
174; CHECK: store ptr addrspace(3)
175define amdgpu_kernel void @no_merge_store_i64_ptr32(ptr addrspace(1) nocapture %a, i64 %val0, ptr addrspace(3) %ptr1) #0 {
176entry:
177  %a.1 =  getelementptr inbounds ptr addrspace(3), ptr addrspace(1) %a, i64 1
178
179  store i64 %val0, ptr addrspace(1) %a
180  store ptr addrspace(3) %ptr1, ptr addrspace(1) %a.1
181
182  ret void
183}
184
185; CHECK-LABEL: @no_merge_load_i64_ptr32(
186; CHECK: load i64,
187; CHECK: load ptr addrspace(3),
188define amdgpu_kernel void @no_merge_load_i64_ptr32(ptr addrspace(1) nocapture %a) #0 {
189entry:
190  %a.1 = getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
191
192  %ld.0 = load i64, ptr addrspace(1) %a
193  %ld.1 = load ptr addrspace(3), ptr addrspace(1) %a.1
194
195  ret void
196}
197
198; CHECK-LABEL: @no_merge_load_ptr32_i64(
199; CHECK: load ptr addrspace(3),
200; CHECK: load i64,
201define amdgpu_kernel void @no_merge_load_ptr32_i64(ptr addrspace(1) nocapture %a) #0 {
202entry:
203  %a.1 =  getelementptr inbounds i64, ptr addrspace(1) %a, i64 1
204
205  %ld.0 = load ptr addrspace(3), ptr addrspace(1) %a
206  %ld.1 = load i64, ptr addrspace(1) %a.1
207
208  ret void
209}
210
211; XXX - This isn't merged for some reason
212; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
213; CHECK: load <2 x ptr addrspace(1)>
214; CHECK: load <2 x ptr addrspace(1)>
215; CHECK: store <2 x ptr addrspace(1)>
216; CHECK: store <2 x ptr addrspace(1)>
217define amdgpu_kernel void @merge_v2p1i8_v2p1i8(ptr addrspace(1) nocapture noalias %a, ptr addrspace(1) nocapture readonly noalias %b) #0 {
218entry:
219  %a.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %a, i64 1
220  %b.1 = getelementptr inbounds <2 x ptr addrspace(1)>, ptr addrspace(1) %b, i64 1
221
222  %ld.c = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b, align 4
223  %ld.c.idx.1 = load <2 x ptr addrspace(1)>, ptr addrspace(1) %b.1, align 4
224
225  store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a, align 4
226  store <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %a.1, align 4
227  ret void
228}
229
230; CHECK-LABEL: @merge_load_ptr64_f64(
231; CHECK: load <2 x i64>
232; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
233; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to ptr addrspace(1)
234; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
235; CHECK: bitcast i64 [[ELT1_INT]] to double
236define amdgpu_kernel void @merge_load_ptr64_f64(ptr addrspace(1) nocapture %a) #0 {
237entry:
238  %a.1 =  getelementptr inbounds double, ptr addrspace(1) %a, i64 1
239
240  %ld.0 = load ptr addrspace(1), ptr addrspace(1) %a
241  %ld.1 = load double, ptr addrspace(1) %a.1
242
243  ret void
244}
245
246; CHECK-LABEL: @merge_load_f64_ptr64(
247; CHECK: load <2 x i64>
248; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
249; CHECK: bitcast i64 [[ELT0]] to double
250; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
251; CHECK: inttoptr i64 [[ELT1]] to ptr addrspace(1)
252define amdgpu_kernel void @merge_load_f64_ptr64(ptr addrspace(1) nocapture %a) #0 {
253entry:
254  %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1
255
256  %ld.0 = load double, ptr addrspace(1) %a
257  %ld.1 = load ptr addrspace(1), ptr addrspace(1) %a.1
258
259  ret void
260}
261
262; CHECK-LABEL: @merge_store_ptr64_f64(
263; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr0 to i64
264; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
265; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64
266; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
267; CHECK: store <2 x i64>
268define amdgpu_kernel void @merge_store_ptr64_f64(ptr addrspace(1) nocapture %a, ptr addrspace(1) %ptr0, double %val1) #0 {
269entry:
270  %a.1 = getelementptr inbounds double, ptr addrspace(1) %a, i64 1
271
272  store ptr addrspace(1) %ptr0, ptr addrspace(1) %a
273  store double %val1, ptr addrspace(1) %a.1
274
275  ret void
276}
277
278; CHECK-LABEL: @merge_store_f64_ptr64(
279; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64
280; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
281; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint ptr addrspace(1) %ptr1 to i64
282; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
283; CHECK: store <2 x i64>
284define amdgpu_kernel void @merge_store_f64_ptr64(ptr addrspace(1) nocapture %a, double %val0, ptr addrspace(1) %ptr1) #0 {
285entry:
286  %a.1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %a, i64 1
287
288  store double %val0, ptr addrspace(1) %a
289  store ptr addrspace(1) %ptr1, ptr addrspace(1) %a.1
290
291  ret void
292}
293
294attributes #0 = { nounwind }
295attributes #1 = { nounwind readnone }
296