xref: /llvm-project/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll (revision 1578c670ff3c0058ac6043e55e40d37d260937bd)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
3; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
4; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
5; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
6; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
7; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
8; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
9; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
10
11define void @load_1byte_chunk_of_1byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
12; CHECK-ALL-LABEL: @load_1byte_chunk_of_1byte_alloca(
13; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [1 x i8], align 64
14; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <1 x i8>, ptr [[SRC:%.*]], align 1
15; CHECK-ALL-NEXT:    store <1 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
16; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
17; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
18; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
19; CHECK-ALL-NEXT:    ret void
20;
21  %intermediate = alloca [1 x i8], align 64
22  %init = load <1 x i8>, ptr %src, align 1
23  store <1 x i8> %init, ptr %intermediate, align 64
24  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
25  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
26  store <1 x i8> %chunk, ptr %dst
27  ret void
28}
29
30define void @load_1byte_chunk_of_2byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
31; CHECK-ALL-LABEL: @load_1byte_chunk_of_2byte_alloca(
32; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
33; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
34; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
35; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
36; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
37; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
38; CHECK-ALL-NEXT:    ret void
39;
40  %intermediate = alloca [2 x i8], align 64
41  %init = load <2 x i8>, ptr %src, align 1
42  store <2 x i8> %init, ptr %intermediate, align 64
43  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
44  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
45  store <1 x i8> %chunk, ptr %dst
46  ret void
47}
48
49define void @load_2byte_chunk_of_2byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
50; CHECK-ALL-LABEL: @load_2byte_chunk_of_2byte_alloca(
51; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
52; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
53; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
54; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
55; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
56; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
57; CHECK-ALL-NEXT:    ret void
58;
59  %intermediate = alloca [2 x i8], align 64
60  %init = load <2 x i8>, ptr %src, align 1
61  store <2 x i8> %init, ptr %intermediate, align 64
62  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
63  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
64  store <2 x i8> %chunk, ptr %dst
65  ret void
66}
67
68define void @load_1byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
69; CHECK-ALL-LABEL: @load_1byte_chunk_of_4byte_alloca(
70; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
71; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
72; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
73; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
74; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
75; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
76; CHECK-ALL-NEXT:    ret void
77;
78  %intermediate = alloca [4 x i8], align 64
79  %init = load <4 x i8>, ptr %src, align 1
80  store <4 x i8> %init, ptr %intermediate, align 64
81  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
82  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
83  store <1 x i8> %chunk, ptr %dst
84  ret void
85}
86
87define void @load_2byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
88; CHECK-ALL-LABEL: @load_2byte_chunk_of_4byte_alloca(
89; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
90; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
91; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
92; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
93; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
94; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
95; CHECK-ALL-NEXT:    ret void
96;
97  %intermediate = alloca [4 x i8], align 64
98  %init = load <4 x i8>, ptr %src, align 1
99  store <4 x i8> %init, ptr %intermediate, align 64
100  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
101  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
102  store <2 x i8> %chunk, ptr %dst
103  ret void
104}
105
106define void @load_4byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
107; CHECK-ALL-LABEL: @load_4byte_chunk_of_4byte_alloca(
108; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
109; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
110; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
111; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
112; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
113; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
114; CHECK-ALL-NEXT:    ret void
115;
116  %intermediate = alloca [4 x i8], align 64
117  %init = load <4 x i8>, ptr %src, align 1
118  store <4 x i8> %init, ptr %intermediate, align 64
119  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
120  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
121  store <4 x i8> %chunk, ptr %dst
122  ret void
123}
124
125define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
126; CHECK-ALL-LABEL: @load_1byte_chunk_of_8byte_alloca(
127; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
128; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
129; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
130; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
131; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
132; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
133; CHECK-ALL-NEXT:    ret void
134;
135  %intermediate = alloca [8 x i8], align 64
136  %init = load <8 x i8>, ptr %src, align 1
137  store <8 x i8> %init, ptr %intermediate, align 64
138  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
139  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
140  store <1 x i8> %chunk, ptr %dst
141  ret void
142}
143
144define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
145; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca(
146; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
147; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
148; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
149; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
150; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
151; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
152; CHECK-ALL-NEXT:    ret void
153;
154  %intermediate = alloca [8 x i8], align 64
155  %init = load <8 x i8>, ptr %src, align 1
156  store <8 x i8> %init, ptr %intermediate, align 64
157  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
158  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
159  store <2 x i8> %chunk, ptr %dst
160  ret void
161}
162
163define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
164; CHECK-ALL-LABEL: @load_4byte_chunk_of_8byte_alloca(
165; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
166; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
167; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
168; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
169; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
170; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
171; CHECK-ALL-NEXT:    ret void
172;
173  %intermediate = alloca [8 x i8], align 64
174  %init = load <8 x i8>, ptr %src, align 1
175  store <8 x i8> %init, ptr %intermediate, align 64
176  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
177  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
178  store <4 x i8> %chunk, ptr %dst
179  ret void
180}
181
182define void @load_8byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
183; CHECK-ALL-LABEL: @load_8byte_chunk_of_8byte_alloca(
184; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
185; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
186; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
187; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
188; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
189; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
190; CHECK-ALL-NEXT:    ret void
191;
192  %intermediate = alloca [8 x i8], align 64
193  %init = load <8 x i8>, ptr %src, align 1
194  store <8 x i8> %init, ptr %intermediate, align 64
195  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
196  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
197  store <8 x i8> %chunk, ptr %dst
198  ret void
199}
200
201define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
202; CHECK-ALL-LABEL: @load_1byte_chunk_of_16byte_alloca(
203; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
204; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
205; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
206; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
207; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
208; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
209; CHECK-ALL-NEXT:    ret void
210;
211  %intermediate = alloca [16 x i8], align 64
212  %init = load <16 x i8>, ptr %src, align 1
213  store <16 x i8> %init, ptr %intermediate, align 64
214  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
215  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
216  store <1 x i8> %chunk, ptr %dst
217  ret void
218}
219
220define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
221; CHECK-ALL-LABEL: @load_2byte_chunk_of_16byte_alloca(
222; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
223; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
224; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
225; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
226; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
227; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
228; CHECK-ALL-NEXT:    ret void
229;
230  %intermediate = alloca [16 x i8], align 64
231  %init = load <16 x i8>, ptr %src, align 1
232  store <16 x i8> %init, ptr %intermediate, align 64
233  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
234  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
235  store <2 x i8> %chunk, ptr %dst
236  ret void
237}
238
239define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
240; CHECK-ALL-LABEL: @load_4byte_chunk_of_16byte_alloca(
241; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
242; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
243; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
244; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
245; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
246; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
247; CHECK-ALL-NEXT:    ret void
248;
249  %intermediate = alloca [16 x i8], align 64
250  %init = load <16 x i8>, ptr %src, align 1
251  store <16 x i8> %init, ptr %intermediate, align 64
252  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
253  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
254  store <4 x i8> %chunk, ptr %dst
255  ret void
256}
257
258define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
259; CHECK-ALL-LABEL: @load_8byte_chunk_of_16byte_alloca(
260; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
261; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
262; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
263; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
264; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
265; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
266; CHECK-ALL-NEXT:    ret void
267;
268  %intermediate = alloca [16 x i8], align 64
269  %init = load <16 x i8>, ptr %src, align 1
270  store <16 x i8> %init, ptr %intermediate, align 64
271  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
272  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
273  store <8 x i8> %chunk, ptr %dst
274  ret void
275}
276
277define void @load_16byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
278; CHECK-ALL-LABEL: @load_16byte_chunk_of_16byte_alloca(
279; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
280; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
281; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
282; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
283; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
284; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
285; CHECK-ALL-NEXT:    ret void
286;
287  %intermediate = alloca [16 x i8], align 64
288  %init = load <16 x i8>, ptr %src, align 1
289  store <16 x i8> %init, ptr %intermediate, align 64
290  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
291  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
292  store <16 x i8> %chunk, ptr %dst
293  ret void
294}
295
296define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
297; CHECK-ALL-LABEL: @load_1byte_chunk_of_32byte_alloca(
298; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
299; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
300; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
301; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
302; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
303; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
304; CHECK-ALL-NEXT:    ret void
305;
306  %intermediate = alloca [32 x i8], align 64
307  %init = load <32 x i8>, ptr %src, align 1
308  store <32 x i8> %init, ptr %intermediate, align 64
309  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
310  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
311  store <1 x i8> %chunk, ptr %dst
312  ret void
313}
314
315define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
316; CHECK-ALL-LABEL: @load_2byte_chunk_of_32byte_alloca(
317; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
318; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
319; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
320; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
321; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
322; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
323; CHECK-ALL-NEXT:    ret void
324;
325  %intermediate = alloca [32 x i8], align 64
326  %init = load <32 x i8>, ptr %src, align 1
327  store <32 x i8> %init, ptr %intermediate, align 64
328  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
329  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
330  store <2 x i8> %chunk, ptr %dst
331  ret void
332}
333
334define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
335; CHECK-ALL-LABEL: @load_4byte_chunk_of_32byte_alloca(
336; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
337; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
338; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
339; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
340; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
341; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
342; CHECK-ALL-NEXT:    ret void
343;
344  %intermediate = alloca [32 x i8], align 64
345  %init = load <32 x i8>, ptr %src, align 1
346  store <32 x i8> %init, ptr %intermediate, align 64
347  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
348  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
349  store <4 x i8> %chunk, ptr %dst
350  ret void
351}
352
353define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
354; CHECK-ALL-LABEL: @load_8byte_chunk_of_32byte_alloca(
355; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
356; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
357; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
358; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
359; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
360; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
361; CHECK-ALL-NEXT:    ret void
362;
363  %intermediate = alloca [32 x i8], align 64
364  %init = load <32 x i8>, ptr %src, align 1
365  store <32 x i8> %init, ptr %intermediate, align 64
366  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
367  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
368  store <8 x i8> %chunk, ptr %dst
369  ret void
370}
371
372define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
373; CHECK-ALL-LABEL: @load_16byte_chunk_of_32byte_alloca(
374; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
375; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
376; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
377; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
378; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
379; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
380; CHECK-ALL-NEXT:    ret void
381;
382  %intermediate = alloca [32 x i8], align 64
383  %init = load <32 x i8>, ptr %src, align 1
384  store <32 x i8> %init, ptr %intermediate, align 64
385  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
386  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
387  store <16 x i8> %chunk, ptr %dst
388  ret void
389}
390
391define void @load_32byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
392; CHECK-ALL-LABEL: @load_32byte_chunk_of_32byte_alloca(
393; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
394; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
395; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
396; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
397; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <32 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
398; CHECK-ALL-NEXT:    store <32 x i8> [[CHUNK]], ptr [[DST:%.*]], align 32
399; CHECK-ALL-NEXT:    ret void
400;
401  %intermediate = alloca [32 x i8], align 64
402  %init = load <32 x i8>, ptr %src, align 1
403  store <32 x i8> %init, ptr %intermediate, align 64
404  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
405  %chunk = load <32 x i8>, ptr %intermediate.off.addr, align 1
406  store <32 x i8> %chunk, ptr %dst
407  ret void
408}
409
410;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
411; CHECK-BE-32: {{.*}}
412; CHECK-BE-64: {{.*}}
413; CHECK-LE-32: {{.*}}
414; CHECK-LE-64: {{.*}}
415; CHECK-SCALAR: {{.*}}
416; CHECK-SCALAR-32: {{.*}}
417; CHECK-SCALAR-64: {{.*}}
418