xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll (revision a3938700856f41086a01c4b8234f9ed574be57f8)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,ALIGNED %s
3; RUN: opt -S -passes=load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,UNALIGNED %s
4; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,ALIGNED %s
5; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,UNALIGNED %s
6
7target triple = "amdgcn--"
8target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
9
10define amdgpu_kernel void @load_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 {
11; ALIGNED-LABEL: @load_unknown_offset_align1_i8(
12; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5)
13; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
14; ALIGNED-NEXT:    [[VAL0:%.*]] = load i8, ptr addrspace(5) [[PTR0]], align 1
15; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1
16; ALIGNED-NEXT:    [[VAL1:%.*]] = load i8, ptr addrspace(5) [[PTR1]], align 1
17; ALIGNED-NEXT:    [[ADD:%.*]] = add i8 [[VAL0]], [[VAL1]]
18; ALIGNED-NEXT:    store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1
19; ALIGNED-NEXT:    ret void
20;
21; UNALIGNED-LABEL: @load_unknown_offset_align1_i8(
22; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5)
23; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
24; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr addrspace(5) [[PTR0]], align 1
25; UNALIGNED-NEXT:    [[VAL01:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
26; UNALIGNED-NEXT:    [[VAL12:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
27; UNALIGNED-NEXT:    [[ADD:%.*]] = add i8 [[VAL01]], [[VAL12]]
28; UNALIGNED-NEXT:    store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1
29; UNALIGNED-NEXT:    ret void
30;
31  %alloca = alloca [128 x i8], align 1, addrspace(5)
32  %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset
33  %val0 = load i8, ptr addrspace(5) %ptr0, align 1
34  %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1
35  %val1 = load i8, ptr addrspace(5) %ptr1, align 1
36  %add = add i8 %val0, %val1
37  store i8 %add, ptr addrspace(1) %out
38  ret void
39}
40
41define amdgpu_kernel void @load_unknown_offset_align1_i16(ptr addrspace(1) noalias %out, i32 %offset) #0 {
42; ALIGNED-LABEL: @load_unknown_offset_align1_i16(
43; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5)
44; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
45; ALIGNED-NEXT:    [[VAL0:%.*]] = load i16, ptr addrspace(5) [[PTR0]], align 1
46; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[PTR0]], i32 1
47; ALIGNED-NEXT:    [[VAL1:%.*]] = load i16, ptr addrspace(5) [[PTR1]], align 1
48; ALIGNED-NEXT:    [[ADD:%.*]] = add i16 [[VAL0]], [[VAL1]]
49; ALIGNED-NEXT:    store i16 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 2
50; ALIGNED-NEXT:    ret void
51;
52; UNALIGNED-LABEL: @load_unknown_offset_align1_i16(
53; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5)
54; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
55; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[PTR0]], align 1
56; UNALIGNED-NEXT:    [[VAL01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0
57; UNALIGNED-NEXT:    [[VAL12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
58; UNALIGNED-NEXT:    [[ADD:%.*]] = add i16 [[VAL01]], [[VAL12]]
59; UNALIGNED-NEXT:    store i16 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 2
60; UNALIGNED-NEXT:    ret void
61;
62  %alloca = alloca [128 x i16], align 1, addrspace(5)
63  %ptr0 = getelementptr inbounds [128 x i16], ptr addrspace(5) %alloca, i32 0, i32 %offset
64  %val0 = load i16, ptr addrspace(5) %ptr0, align 1
65  %ptr1 = getelementptr inbounds i16, ptr addrspace(5) %ptr0, i32 1
66  %val1 = load i16, ptr addrspace(5) %ptr1, align 1
67  %add = add i16 %val0, %val1
68  store i16 %add, ptr addrspace(1) %out
69  ret void
70}
71
72; FIXME: Although the offset is unknown here, we know it is a multiple
73; of the element size, so should still be align 4
74define amdgpu_kernel void @load_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 {
75; ALIGNED-LABEL: @load_unknown_offset_align1_i32(
76; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5)
77; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
78; ALIGNED-NEXT:    [[VAL0:%.*]] = load i32, ptr addrspace(5) [[PTR0]], align 1
79; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1
80; ALIGNED-NEXT:    [[VAL1:%.*]] = load i32, ptr addrspace(5) [[PTR1]], align 1
81; ALIGNED-NEXT:    [[ADD:%.*]] = add i32 [[VAL0]], [[VAL1]]
82; ALIGNED-NEXT:    store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4
83; ALIGNED-NEXT:    ret void
84;
85; UNALIGNED-LABEL: @load_unknown_offset_align1_i32(
86; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5)
87; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
88; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[PTR0]], align 1
89; UNALIGNED-NEXT:    [[VAL01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
90; UNALIGNED-NEXT:    [[VAL12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
91; UNALIGNED-NEXT:    [[ADD:%.*]] = add i32 [[VAL01]], [[VAL12]]
92; UNALIGNED-NEXT:    store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4
93; UNALIGNED-NEXT:    ret void
94;
95  %alloca = alloca [128 x i32], align 1, addrspace(5)
96  %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset
97  %val0 = load i32, ptr addrspace(5) %ptr0, align 1
98  %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1
99  %val1 = load i32, ptr addrspace(5) %ptr1, align 1
100  %add = add i32 %val0, %val1
101  store i32 %add, ptr addrspace(1) %out
102  ret void
103}
104
105; Make sure alloca alignment isn't decreased
106define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 {
107; ALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i32(
108; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5)
109; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
110; ALIGNED-NEXT:    [[VAL0:%.*]] = load i32, ptr addrspace(5) [[PTR0]], align 1
111; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1
112; ALIGNED-NEXT:    [[VAL1:%.*]] = load i32, ptr addrspace(5) [[PTR1]], align 1
113; ALIGNED-NEXT:    [[ADD:%.*]] = add i32 [[VAL0]], [[VAL1]]
114; ALIGNED-NEXT:    store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4
115; ALIGNED-NEXT:    ret void
116;
117; UNALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i32(
118; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5)
119; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
120; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[PTR0]], align 4
121; UNALIGNED-NEXT:    [[VAL01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
122; UNALIGNED-NEXT:    [[VAL12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
123; UNALIGNED-NEXT:    [[ADD:%.*]] = add i32 [[VAL01]], [[VAL12]]
124; UNALIGNED-NEXT:    store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4
125; UNALIGNED-NEXT:    ret void
126;
127  %alloca = alloca [128 x i32], align 16, addrspace(5)
128  %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset
129  %val0 = load i32, ptr addrspace(5) %ptr0, align 1
130  %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1
131  %val1 = load i32, ptr addrspace(5) %ptr1, align 1
132  %add = add i32 %val0, %val1
133  store i32 %add, ptr addrspace(1) %out
134  ret void
135}
136
137define amdgpu_kernel void @store_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 {
138; ALIGNED-LABEL: @store_unknown_offset_align1_i8(
139; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5)
140; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
141; ALIGNED-NEXT:    store i8 9, ptr addrspace(5) [[PTR0]], align 1
142; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1
143; ALIGNED-NEXT:    store i8 10, ptr addrspace(5) [[PTR1]], align 1
144; ALIGNED-NEXT:    ret void
145;
146; UNALIGNED-LABEL: @store_unknown_offset_align1_i8(
147; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5)
148; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
149; UNALIGNED-NEXT:    store <2 x i8> <i8 9, i8 10>, ptr addrspace(5) [[PTR0]], align 1
150; UNALIGNED-NEXT:    ret void
151;
152  %alloca = alloca [128 x i8], align 1, addrspace(5)
153  %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset
154  store i8 9, ptr addrspace(5) %ptr0, align 1
155  %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1
156  store i8 10, ptr addrspace(5) %ptr1, align 1
157  ret void
158}
159
160define amdgpu_kernel void @store_unknown_offset_align1_i16(ptr addrspace(1) noalias %out, i32 %offset) #0 {
161; ALIGNED-LABEL: @store_unknown_offset_align1_i16(
162; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5)
163; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
164; ALIGNED-NEXT:    store i16 9, ptr addrspace(5) [[PTR0]], align 1
165; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[PTR0]], i32 1
166; ALIGNED-NEXT:    store i16 10, ptr addrspace(5) [[PTR1]], align 1
167; ALIGNED-NEXT:    ret void
168;
169; UNALIGNED-LABEL: @store_unknown_offset_align1_i16(
170; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5)
171; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
172; UNALIGNED-NEXT:    store <2 x i16> <i16 9, i16 10>, ptr addrspace(5) [[PTR0]], align 1
173; UNALIGNED-NEXT:    ret void
174;
175  %alloca = alloca [128 x i16], align 1, addrspace(5)
176  %ptr0 = getelementptr inbounds [128 x i16], ptr addrspace(5) %alloca, i32 0, i32 %offset
177  store i16 9, ptr addrspace(5) %ptr0, align 1
178  %ptr1 = getelementptr inbounds i16, ptr addrspace(5) %ptr0, i32 1
179  store i16 10, ptr addrspace(5) %ptr1, align 1
180  ret void
181}
182
183; FIXME: Although the offset is unknown here, we know it is a multiple
184; of the element size, so it still should be align 4.
185
186define amdgpu_kernel void @store_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 {
187; ALIGNED-LABEL: @store_unknown_offset_align1_i32(
188; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5)
189; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
190; ALIGNED-NEXT:    store i32 9, ptr addrspace(5) [[PTR0]], align 1
191; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1
192; ALIGNED-NEXT:    store i32 10, ptr addrspace(5) [[PTR1]], align 1
193; ALIGNED-NEXT:    ret void
194;
195; UNALIGNED-LABEL: @store_unknown_offset_align1_i32(
196; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5)
197; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
198; UNALIGNED-NEXT:    store <2 x i32> <i32 9, i32 10>, ptr addrspace(5) [[PTR0]], align 1
199; UNALIGNED-NEXT:    ret void
200;
201  %alloca = alloca [128 x i32], align 1, addrspace(5)
202  %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset
203  store i32 9, ptr addrspace(5) %ptr0, align 1
204  %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1
205  store i32 10, ptr addrspace(5) %ptr1, align 1
206  ret void
207}
208
209define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32() {
210; CHECK-LABEL: @merge_private_store_4_vector_elts_loads_v4i32(
211; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [8 x i32], align 4, addrspace(5)
212; CHECK-NEXT:    store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, ptr addrspace(5) [[ALLOCA]], align 4
213; CHECK-NEXT:    ret void
214;
215  %alloca = alloca [8 x i32], align 1, addrspace(5)
216  %out.gep.1 = getelementptr i32, ptr addrspace(5) %alloca, i32 1
217  %out.gep.2 = getelementptr i32, ptr addrspace(5) %alloca, i32 2
218  %out.gep.3 = getelementptr i32, ptr addrspace(5) %alloca, i32 3
219
220  store i32 9, ptr addrspace(5) %alloca, align 1
221  store i32 1, ptr addrspace(5) %out.gep.1, align 1
222  store i32 23, ptr addrspace(5) %out.gep.2, align 1
223  store i32 19, ptr addrspace(5) %out.gep.3, align 1
224  ret void
225}
226
227define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8() {
228; CHECK-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
229; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [8 x i8], align 4, addrspace(5)
230; CHECK-NEXT:    store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, ptr addrspace(5) [[ALLOCA]], align 4
231; CHECK-NEXT:    ret void
232;
233  %alloca = alloca [8 x i8], align 1, addrspace(5)
234  %out.gep.1 = getelementptr i8, ptr addrspace(5) %alloca, i8 1
235  %out.gep.2 = getelementptr i8, ptr addrspace(5) %alloca, i8 2
236  %out.gep.3 = getelementptr i8, ptr addrspace(5) %alloca, i8 3
237
238  store i8 9, ptr addrspace(5) %alloca, align 1
239  store i8 1, ptr addrspace(5) %out.gep.1, align 1
240  store i8 23, ptr addrspace(5) %out.gep.2, align 1
241  store i8 19, ptr addrspace(5) %out.gep.3, align 1
242  ret void
243}
244
245define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i32() {
246; CHECK-LABEL: @merge_private_load_4_vector_elts_loads_v4i32(
247; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [8 x i32], align 4, addrspace(5)
248; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(5) [[ALLOCA]], align 4
249; CHECK-NEXT:    [[LOAD01:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
250; CHECK-NEXT:    [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
251; CHECK-NEXT:    [[LOAD23:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
252; CHECK-NEXT:    [[LOAD34:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
253; CHECK-NEXT:    ret void
254;
255  %alloca = alloca [8 x i32], align 1, addrspace(5)
256  %out.gep.1 = getelementptr i32, ptr addrspace(5) %alloca, i32 1
257  %out.gep.2 = getelementptr i32, ptr addrspace(5) %alloca, i32 2
258  %out.gep.3 = getelementptr i32, ptr addrspace(5) %alloca, i32 3
259
260  %load0 = load i32, ptr addrspace(5) %alloca, align 1
261  %load1 = load i32, ptr addrspace(5) %out.gep.1, align 1
262  %load2 = load i32, ptr addrspace(5) %out.gep.2, align 1
263  %load3 = load i32, ptr addrspace(5) %out.gep.3, align 1
264  ret void
265}
266
267define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i8() {
268; CHECK-LABEL: @merge_private_load_4_vector_elts_loads_v4i8(
269; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [8 x i8], align 4, addrspace(5)
270; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr addrspace(5) [[ALLOCA]], align 4
271; CHECK-NEXT:    [[LOAD01:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
272; CHECK-NEXT:    [[LOAD12:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
273; CHECK-NEXT:    [[LOAD23:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
274; CHECK-NEXT:    [[LOAD34:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
275; CHECK-NEXT:    ret void
276;
277  %alloca = alloca [8 x i8], align 1, addrspace(5)
278  %out.gep.1 = getelementptr i8, ptr addrspace(5) %alloca, i8 1
279  %out.gep.2 = getelementptr i8, ptr addrspace(5) %alloca, i8 2
280  %out.gep.3 = getelementptr i8, ptr addrspace(5) %alloca, i8 3
281
282  %load0 = load i8, ptr addrspace(5) %alloca, align 1
283  %load1 = load i8, ptr addrspace(5) %out.gep.1, align 1
284  %load2 = load i8, ptr addrspace(5) %out.gep.2, align 1
285  %load3 = load i8, ptr addrspace(5) %out.gep.3, align 1
286  ret void
287}
288
289; Make sure we don't think the alignment will increase if the base address isn't an alloca
290define void @private_store_2xi16_align2_not_alloca(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 {
291; ALIGNED-LABEL: @private_store_2xi16_align2_not_alloca(
292; ALIGNED-NEXT:    [[GEP_R:%.*]] = getelementptr i16, ptr addrspace(5) [[R:%.*]], i32 1
293; ALIGNED-NEXT:    store i16 1, ptr addrspace(5) [[R]], align 2
294; ALIGNED-NEXT:    store i16 2, ptr addrspace(5) [[GEP_R]], align 2
295; ALIGNED-NEXT:    ret void
296;
297; UNALIGNED-LABEL: @private_store_2xi16_align2_not_alloca(
298; UNALIGNED-NEXT:    store <2 x i16> <i16 1, i16 2>, ptr addrspace(5) [[R:%.*]], align 2
299; UNALIGNED-NEXT:    ret void
300;
301  %gep.r = getelementptr i16, ptr addrspace(5) %r, i32 1
302  store i16 1, ptr addrspace(5) %r, align 2
303  store i16 2, ptr addrspace(5) %gep.r, align 2
304  ret void
305}
306
307define void @private_store_2xi16_align1_not_alloca(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 {
308; ALIGNED-LABEL: @private_store_2xi16_align1_not_alloca(
309; ALIGNED-NEXT:    [[GEP_R:%.*]] = getelementptr i16, ptr addrspace(5) [[R:%.*]], i32 1
310; ALIGNED-NEXT:    store i16 1, ptr addrspace(5) [[R]], align 1
311; ALIGNED-NEXT:    store i16 2, ptr addrspace(5) [[GEP_R]], align 1
312; ALIGNED-NEXT:    ret void
313;
314; UNALIGNED-LABEL: @private_store_2xi16_align1_not_alloca(
315; UNALIGNED-NEXT:    store <2 x i16> <i16 1, i16 2>, ptr addrspace(5) [[R:%.*]], align 1
316; UNALIGNED-NEXT:    ret void
317;
318  %gep.r = getelementptr i16, ptr addrspace(5) %r, i32 1
319  store i16 1, ptr addrspace(5) %r, align 1
320  store i16 2, ptr addrspace(5) %gep.r, align 1
321  ret void
322}
323
324define i32 @private_load_2xi16_align2_not_alloca(ptr addrspace(5) %p) #0 {
325; ALIGNED-LABEL: @private_load_2xi16_align2_not_alloca(
326; ALIGNED-NEXT:    [[GEP_P:%.*]] = getelementptr i16, ptr addrspace(5) [[P:%.*]], i64 1
327; ALIGNED-NEXT:    [[P_0:%.*]] = load i16, ptr addrspace(5) [[P]], align 2
328; ALIGNED-NEXT:    [[P_1:%.*]] = load i16, ptr addrspace(5) [[GEP_P]], align 2
329; ALIGNED-NEXT:    [[ZEXT_0:%.*]] = zext i16 [[P_0]] to i32
330; ALIGNED-NEXT:    [[ZEXT_1:%.*]] = zext i16 [[P_1]] to i32
331; ALIGNED-NEXT:    [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16
332; ALIGNED-NEXT:    [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]]
333; ALIGNED-NEXT:    ret i32 [[OR]]
334;
335; UNALIGNED-LABEL: @private_load_2xi16_align2_not_alloca(
336; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[P:%.*]], align 2
337; UNALIGNED-NEXT:    [[P_01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0
338; UNALIGNED-NEXT:    [[P_12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
339; UNALIGNED-NEXT:    [[ZEXT_0:%.*]] = zext i16 [[P_01]] to i32
340; UNALIGNED-NEXT:    [[ZEXT_1:%.*]] = zext i16 [[P_12]] to i32
341; UNALIGNED-NEXT:    [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16
342; UNALIGNED-NEXT:    [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]]
343; UNALIGNED-NEXT:    ret i32 [[OR]]
344;
345  %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1
346  %p.0 = load i16, ptr addrspace(5) %p, align 2
347  %p.1 = load i16, ptr addrspace(5) %gep.p, align 2
348  %zext.0 = zext i16 %p.0 to i32
349  %zext.1 = zext i16 %p.1 to i32
350  %shl.1 = shl i32 %zext.1, 16
351  %or = or i32 %zext.0, %shl.1
352  ret i32 %or
353}
354
355define i32 @private_load_2xi16_align1_not_alloca(ptr addrspace(5) %p) #0 {
356; ALIGNED-LABEL: @private_load_2xi16_align1_not_alloca(
357; ALIGNED-NEXT:    [[GEP_P:%.*]] = getelementptr i16, ptr addrspace(5) [[P:%.*]], i64 1
358; ALIGNED-NEXT:    [[P_0:%.*]] = load i16, ptr addrspace(5) [[P]], align 1
359; ALIGNED-NEXT:    [[P_1:%.*]] = load i16, ptr addrspace(5) [[GEP_P]], align 1
360; ALIGNED-NEXT:    [[ZEXT_0:%.*]] = zext i16 [[P_0]] to i32
361; ALIGNED-NEXT:    [[ZEXT_1:%.*]] = zext i16 [[P_1]] to i32
362; ALIGNED-NEXT:    [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16
363; ALIGNED-NEXT:    [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]]
364; ALIGNED-NEXT:    ret i32 [[OR]]
365;
366; UNALIGNED-LABEL: @private_load_2xi16_align1_not_alloca(
367; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[P:%.*]], align 1
368; UNALIGNED-NEXT:    [[P_01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0
369; UNALIGNED-NEXT:    [[P_12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
370; UNALIGNED-NEXT:    [[ZEXT_0:%.*]] = zext i16 [[P_01]] to i32
371; UNALIGNED-NEXT:    [[ZEXT_1:%.*]] = zext i16 [[P_12]] to i32
372; UNALIGNED-NEXT:    [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16
373; UNALIGNED-NEXT:    [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]]
374; UNALIGNED-NEXT:    ret i32 [[OR]]
375;
376  %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1
377  %p.0 = load i16, ptr addrspace(5) %p, align 1
378  %p.1 = load i16, ptr addrspace(5) %gep.p, align 1
379  %zext.0 = zext i16 %p.0 to i32
380  %zext.1 = zext i16 %p.1 to i32
381  %shl.1 = shl i32 %zext.1, 16
382  %or = or i32 %zext.0, %shl.1
383  ret i32 %or
384}
385
386define void @load_alloca16_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 {
387; ALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i8(
388; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 16, addrspace(5)
389; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
390; ALIGNED-NEXT:    [[VAL0:%.*]] = load i8, ptr addrspace(5) [[PTR0]], align 1
391; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1
392; ALIGNED-NEXT:    [[VAL1:%.*]] = load i8, ptr addrspace(5) [[PTR1]], align 1
393; ALIGNED-NEXT:    [[ADD:%.*]] = add i8 [[VAL0]], [[VAL1]]
394; ALIGNED-NEXT:    store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1
395; ALIGNED-NEXT:    ret void
396;
397; UNALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i8(
398; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i8], align 16, addrspace(5)
399; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
400; UNALIGNED-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr addrspace(5) [[PTR0]], align 1
401; UNALIGNED-NEXT:    [[VAL01:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
402; UNALIGNED-NEXT:    [[VAL12:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
403; UNALIGNED-NEXT:    [[ADD:%.*]] = add i8 [[VAL01]], [[VAL12]]
404; UNALIGNED-NEXT:    store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1
405; UNALIGNED-NEXT:    ret void
406;
407  %alloca = alloca [128 x i8], align 16, addrspace(5)
408  %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset
409  %val0 = load i8, ptr addrspace(5) %ptr0, align 1
410  %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1
411  %val1 = load i8, ptr addrspace(5) %ptr1, align 1
412  %add = add i8 %val0, %val1
413  store i8 %add, ptr addrspace(1) %out
414  ret void
415}
416
417define void @store_alloca16_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 {
418; ALIGNED-LABEL: @store_alloca16_unknown_offset_align1_i32(
419; ALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5)
420; ALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
421; ALIGNED-NEXT:    store i32 9, ptr addrspace(5) [[PTR0]], align 1
422; ALIGNED-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1
423; ALIGNED-NEXT:    store i32 10, ptr addrspace(5) [[PTR1]], align 1
424; ALIGNED-NEXT:    ret void
425;
426; UNALIGNED-LABEL: @store_alloca16_unknown_offset_align1_i32(
427; UNALIGNED-NEXT:    [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5)
428; UNALIGNED-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]]
429; UNALIGNED-NEXT:    store <2 x i32> <i32 9, i32 10>, ptr addrspace(5) [[PTR0]], align 4
430; UNALIGNED-NEXT:    ret void
431;
432  %alloca = alloca [128 x i32], align 16, addrspace(5)
433  %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset
434  store i32 9, ptr addrspace(5) %ptr0, align 1
435  %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1
436  store i32 10, ptr addrspace(5) %ptr1, align 1
437  ret void
438}
439
440attributes #0 = { nounwind }
441