xref: /llvm-project/llvm/test/CodeGen/ARM/load-combine-big-endian.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
4; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6
5; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7
6
7; ptr p; // p is 4 byte aligned
8; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
9define i32 @load_i32_by_i8_big_endian(ptr %arg) {
10; CHECK-LABEL: load_i32_by_i8_big_endian:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    ldr r0, [r0]
13; CHECK-NEXT:    mov pc, lr
14;
15; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian:
16; CHECK-ARMv6:       @ %bb.0:
17; CHECK-ARMv6-NEXT:    ldr r0, [r0]
18; CHECK-ARMv6-NEXT:    bx lr
19;
20; CHECK-THUMBv6-LABEL: load_i32_by_i8_big_endian:
21; CHECK-THUMBv6:       @ %bb.0:
22; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
23; CHECK-THUMBv6-NEXT:    bx lr
24;
25; CHECK-THUMBv7-LABEL: load_i32_by_i8_big_endian:
26; CHECK-THUMBv7:       @ %bb.0:
27; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
28; CHECK-THUMBv7-NEXT:    bx lr
29
30  %tmp1 = load i8, ptr %arg, align 4
31  %tmp2 = zext i8 %tmp1 to i32
32  %tmp3 = shl nuw nsw i32 %tmp2, 24
33  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
34  %tmp5 = load i8, ptr %tmp4, align 1
35  %tmp6 = zext i8 %tmp5 to i32
36  %tmp7 = shl nuw nsw i32 %tmp6, 16
37  %tmp8 = or i32 %tmp7, %tmp3
38  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
39  %tmp10 = load i8, ptr %tmp9, align 1
40  %tmp11 = zext i8 %tmp10 to i32
41  %tmp12 = shl nuw nsw i32 %tmp11, 8
42  %tmp13 = or i32 %tmp8, %tmp12
43  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
44  %tmp15 = load i8, ptr %tmp14, align 1
45  %tmp16 = zext i8 %tmp15 to i32
46  %tmp17 = or i32 %tmp13, %tmp16
47  ret i32 %tmp17
48}
49
50; ptr p; // p is 4 byte aligned
51; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
52define i32 @load_i32_by_i8_bswap(ptr %arg) {
53; BSWAP is not supported by 32 bit target
54; CHECK-LABEL: load_i32_by_i8_bswap:
55; CHECK:       @ %bb.0:
56; CHECK-NEXT:    ldr r0, [r0]
57; CHECK-NEXT:    mov r1, #65280
58; CHECK-NEXT:    and r2, r0, #65280
59; CHECK-NEXT:    and r1, r1, r0, lsr #8
60; CHECK-NEXT:    orr r1, r1, r0, lsr #24
61; CHECK-NEXT:    lsl r0, r0, #24
62; CHECK-NEXT:    orr r0, r0, r2, lsl #8
63; CHECK-NEXT:    orr r0, r0, r1
64; CHECK-NEXT:    mov pc, lr
65;
66; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
67; CHECK-ARMv6:       @ %bb.0:
68; CHECK-ARMv6-NEXT:    ldr r0, [r0]
69; CHECK-ARMv6-NEXT:    rev r0, r0
70; CHECK-ARMv6-NEXT:    bx lr
71;
72; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap:
73; CHECK-THUMBv6:       @ %bb.0:
74; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
75; CHECK-THUMBv6-NEXT:    rev r0, r0
76; CHECK-THUMBv6-NEXT:    bx lr
77;
78; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap:
79; CHECK-THUMBv7:       @ %bb.0:
80; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
81; CHECK-THUMBv7-NEXT:    rev r0, r0
82; CHECK-THUMBv7-NEXT:    bx lr
83
84  %tmp2 = load i8, ptr %arg, align 4
85  %tmp3 = zext i8 %tmp2 to i32
86  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
87  %tmp5 = load i8, ptr %tmp4, align 1
88  %tmp6 = zext i8 %tmp5 to i32
89  %tmp7 = shl nuw nsw i32 %tmp6, 8
90  %tmp8 = or i32 %tmp7, %tmp3
91  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
92  %tmp10 = load i8, ptr %tmp9, align 1
93  %tmp11 = zext i8 %tmp10 to i32
94  %tmp12 = shl nuw nsw i32 %tmp11, 16
95  %tmp13 = or i32 %tmp8, %tmp12
96  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
97  %tmp15 = load i8, ptr %tmp14, align 1
98  %tmp16 = zext i8 %tmp15 to i32
99  %tmp17 = shl nuw nsw i32 %tmp16, 24
100  %tmp18 = or i32 %tmp13, %tmp17
101  ret i32 %tmp18
102}
103
104; ptr p; // p is 4 byte aligned
105; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
106define i32 @load_i32_by_i16_by_i8_big_endian(ptr %arg) {
107; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
108; CHECK:       @ %bb.0:
109; CHECK-NEXT:    ldr r0, [r0]
110; CHECK-NEXT:    mov pc, lr
111;
112; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian:
113; CHECK-ARMv6:       @ %bb.0:
114; CHECK-ARMv6-NEXT:    ldr r0, [r0]
115; CHECK-ARMv6-NEXT:    bx lr
116;
117; CHECK-THUMBv6-LABEL: load_i32_by_i16_by_i8_big_endian:
118; CHECK-THUMBv6:       @ %bb.0:
119; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
120; CHECK-THUMBv6-NEXT:    bx lr
121;
122; CHECK-THUMBv7-LABEL: load_i32_by_i16_by_i8_big_endian:
123; CHECK-THUMBv7:       @ %bb.0:
124; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
125; CHECK-THUMBv7-NEXT:    bx lr
126
127  %tmp1 = load i8, ptr %arg, align 4
128  %tmp2 = zext i8 %tmp1 to i16
129  %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
130  %tmp4 = load i8, ptr %tmp3, align 1
131  %tmp5 = zext i8 %tmp4 to i16
132  %tmp6 = shl nuw nsw i16 %tmp2, 8
133  %tmp7 = or i16 %tmp6, %tmp5
134  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
135  %tmp9 = load i8, ptr %tmp8, align 1
136  %tmp10 = zext i8 %tmp9 to i16
137  %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
138  %tmp12 = load i8, ptr %tmp11, align 1
139  %tmp13 = zext i8 %tmp12 to i16
140  %tmp14 = shl nuw nsw i16 %tmp10, 8
141  %tmp15 = or i16 %tmp14, %tmp13
142  %tmp16 = zext i16 %tmp7 to i32
143  %tmp17 = zext i16 %tmp15 to i32
144  %tmp18 = shl nuw nsw i32 %tmp16, 16
145  %tmp19 = or i32 %tmp18, %tmp17
146  ret i32 %tmp19
147}
148
149; ptr p; // p is 4 byte aligned
150; ((i32) p[0] << 16) | (i32) p[1]
151define i32 @load_i32_by_i16(ptr %arg) {
152; CHECK-LABEL: load_i32_by_i16:
153; CHECK:       @ %bb.0:
154; CHECK-NEXT:    ldr r0, [r0]
155; CHECK-NEXT:    mov pc, lr
156;
157; CHECK-ARMv6-LABEL: load_i32_by_i16:
158; CHECK-ARMv6:       @ %bb.0:
159; CHECK-ARMv6-NEXT:    ldr r0, [r0]
160; CHECK-ARMv6-NEXT:    bx lr
161;
162; CHECK-THUMBv6-LABEL: load_i32_by_i16:
163; CHECK-THUMBv6:       @ %bb.0:
164; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
165; CHECK-THUMBv6-NEXT:    bx lr
166;
167; CHECK-THUMBv7-LABEL: load_i32_by_i16:
168; CHECK-THUMBv7:       @ %bb.0:
169; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
170; CHECK-THUMBv7-NEXT:    bx lr
171
172  %tmp1 = load i16, ptr %arg, align 4
173  %tmp2 = zext i16 %tmp1 to i32
174  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
175  %tmp4 = load i16, ptr %tmp3, align 1
176  %tmp5 = zext i16 %tmp4 to i32
177  %tmp6 = shl nuw nsw i32 %tmp2, 16
178  %tmp7 = or i32 %tmp6, %tmp5
179  ret i32 %tmp7
180}
181
182; ptr p_16; // p_16 is 4 byte aligned
183; ptr p_8 = (ptr) p_16;
184; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
185define i32 @load_i32_by_i16_i8(ptr %arg) {
186; CHECK-LABEL: load_i32_by_i16_i8:
187; CHECK:       @ %bb.0:
188; CHECK-NEXT:    ldr r0, [r0]
189; CHECK-NEXT:    mov pc, lr
190;
191; CHECK-ARMv6-LABEL: load_i32_by_i16_i8:
192; CHECK-ARMv6:       @ %bb.0:
193; CHECK-ARMv6-NEXT:    ldr r0, [r0]
194; CHECK-ARMv6-NEXT:    bx lr
195;
196; CHECK-THUMBv6-LABEL: load_i32_by_i16_i8:
197; CHECK-THUMBv6:       @ %bb.0:
198; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
199; CHECK-THUMBv6-NEXT:    bx lr
200;
201; CHECK-THUMBv7-LABEL: load_i32_by_i16_i8:
202; CHECK-THUMBv7:       @ %bb.0:
203; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
204; CHECK-THUMBv7-NEXT:    bx lr
205
206  %tmp2 = load i16, ptr %arg, align 4
207  %tmp3 = zext i16 %tmp2 to i32
208  %tmp4 = shl nuw nsw i32 %tmp3, 16
209  %tmp5 = getelementptr inbounds i8, ptr %arg, i32 2
210  %tmp6 = load i8, ptr %tmp5, align 1
211  %tmp7 = zext i8 %tmp6 to i32
212  %tmp8 = shl nuw nsw i32 %tmp7, 8
213  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
214  %tmp10 = load i8, ptr %tmp9, align 1
215  %tmp11 = zext i8 %tmp10 to i32
216  %tmp12 = or i32 %tmp8, %tmp11
217  %tmp13 = or i32 %tmp12, %tmp4
218  ret i32 %tmp13
219}
220
221; ptr p; // p is 8 byte aligned
222; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
223define i64 @load_i64_by_i8_bswap(ptr %arg) {
224; CHECK-LABEL: load_i64_by_i8_bswap:
225; CHECK:       @ %bb.0:
226; CHECK-NEXT:    ldr r1, [r0]
227; CHECK-NEXT:    mov r12, #65280
228; CHECK-NEXT:    ldr r0, [r0, #4]
229; CHECK-NEXT:    and r2, r0, #65280
230; CHECK-NEXT:    and r3, r12, r0, lsr #8
231; CHECK-NEXT:    orr r3, r3, r0, lsr #24
232; CHECK-NEXT:    lsl r0, r0, #24
233; CHECK-NEXT:    orr r0, r0, r2, lsl #8
234; CHECK-NEXT:    and r2, r12, r1, lsr #8
235; CHECK-NEXT:    orr r0, r0, r3
236; CHECK-NEXT:    and r3, r1, #65280
237; CHECK-NEXT:    orr r2, r2, r1, lsr #24
238; CHECK-NEXT:    lsl r1, r1, #24
239; CHECK-NEXT:    orr r1, r1, r3, lsl #8
240; CHECK-NEXT:    orr r1, r1, r2
241; CHECK-NEXT:    mov pc, lr
242;
243; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
244; CHECK-ARMv6:       @ %bb.0:
245; CHECK-ARMv6-NEXT:    ldrd r2, r3, [r0]
246; CHECK-ARMv6-NEXT:    rev r0, r3
247; CHECK-ARMv6-NEXT:    rev r1, r2
248; CHECK-ARMv6-NEXT:    bx lr
249;
250; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap:
251; CHECK-THUMBv6:       @ %bb.0:
252; CHECK-THUMBv6-NEXT:    ldr r1, [r0]
253; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #4]
254; CHECK-THUMBv6-NEXT:    rev r0, r0
255; CHECK-THUMBv6-NEXT:    rev r1, r1
256; CHECK-THUMBv6-NEXT:    bx lr
257;
258; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap:
259; CHECK-THUMBv7:       @ %bb.0:
260; CHECK-THUMBv7-NEXT:    ldr r1, [r0]
261; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #4]
262; CHECK-THUMBv7-NEXT:    rev r0, r0
263; CHECK-THUMBv7-NEXT:    rev r1, r1
264; CHECK-THUMBv7-NEXT:    bx lr
265
266  %tmp1 = load i8, ptr %arg, align 8
267  %tmp2 = zext i8 %tmp1 to i64
268  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
269  %tmp4 = load i8, ptr %tmp3, align 1
270  %tmp5 = zext i8 %tmp4 to i64
271  %tmp6 = shl nuw nsw i64 %tmp5, 8
272  %tmp7 = or i64 %tmp6, %tmp2
273  %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
274  %tmp9 = load i8, ptr %tmp8, align 1
275  %tmp10 = zext i8 %tmp9 to i64
276  %tmp11 = shl nuw nsw i64 %tmp10, 16
277  %tmp12 = or i64 %tmp7, %tmp11
278  %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
279  %tmp14 = load i8, ptr %tmp13, align 1
280  %tmp15 = zext i8 %tmp14 to i64
281  %tmp16 = shl nuw nsw i64 %tmp15, 24
282  %tmp17 = or i64 %tmp12, %tmp16
283  %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
284  %tmp19 = load i8, ptr %tmp18, align 1
285  %tmp20 = zext i8 %tmp19 to i64
286  %tmp21 = shl nuw nsw i64 %tmp20, 32
287  %tmp22 = or i64 %tmp17, %tmp21
288  %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
289  %tmp24 = load i8, ptr %tmp23, align 1
290  %tmp25 = zext i8 %tmp24 to i64
291  %tmp26 = shl nuw nsw i64 %tmp25, 40
292  %tmp27 = or i64 %tmp22, %tmp26
293  %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
294  %tmp29 = load i8, ptr %tmp28, align 1
295  %tmp30 = zext i8 %tmp29 to i64
296  %tmp31 = shl nuw nsw i64 %tmp30, 48
297  %tmp32 = or i64 %tmp27, %tmp31
298  %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
299  %tmp34 = load i8, ptr %tmp33, align 1
300  %tmp35 = zext i8 %tmp34 to i64
301  %tmp36 = shl nuw i64 %tmp35, 56
302  %tmp37 = or i64 %tmp32, %tmp36
303  ret i64 %tmp37
304}
305
306; ptr p; // p is 8 byte aligned
307; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
308define i64 @load_i64_by_i8(ptr %arg) {
309; CHECK-LABEL: load_i64_by_i8:
310; CHECK:       @ %bb.0:
311; CHECK-NEXT:    ldr r2, [r0]
312; CHECK-NEXT:    ldr r1, [r0, #4]
313; CHECK-NEXT:    mov r0, r2
314; CHECK-NEXT:    mov pc, lr
315;
316; CHECK-ARMv6-LABEL: load_i64_by_i8:
317; CHECK-ARMv6:       @ %bb.0:
318; CHECK-ARMv6-NEXT:    ldrd r0, r1, [r0]
319; CHECK-ARMv6-NEXT:    bx lr
320;
321; CHECK-THUMBv6-LABEL: load_i64_by_i8:
322; CHECK-THUMBv6:       @ %bb.0:
323; CHECK-THUMBv6-NEXT:    ldr r2, [r0]
324; CHECK-THUMBv6-NEXT:    ldr r1, [r0, #4]
325; CHECK-THUMBv6-NEXT:    mov r0, r2
326; CHECK-THUMBv6-NEXT:    bx lr
327;
328; CHECK-THUMBv7-LABEL: load_i64_by_i8:
329; CHECK-THUMBv7:       @ %bb.0:
330; CHECK-THUMBv7-NEXT:    ldr r2, [r0]
331; CHECK-THUMBv7-NEXT:    ldr r1, [r0, #4]
332; CHECK-THUMBv7-NEXT:    mov r0, r2
333; CHECK-THUMBv7-NEXT:    bx lr
334
335  %tmp1 = load i8, ptr %arg, align 8
336  %tmp2 = zext i8 %tmp1 to i64
337  %tmp3 = shl nuw i64 %tmp2, 56
338  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
339  %tmp5 = load i8, ptr %tmp4, align 1
340  %tmp6 = zext i8 %tmp5 to i64
341  %tmp7 = shl nuw nsw i64 %tmp6, 48
342  %tmp8 = or i64 %tmp7, %tmp3
343  %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
344  %tmp10 = load i8, ptr %tmp9, align 1
345  %tmp11 = zext i8 %tmp10 to i64
346  %tmp12 = shl nuw nsw i64 %tmp11, 40
347  %tmp13 = or i64 %tmp8, %tmp12
348  %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
349  %tmp15 = load i8, ptr %tmp14, align 1
350  %tmp16 = zext i8 %tmp15 to i64
351  %tmp17 = shl nuw nsw i64 %tmp16, 32
352  %tmp18 = or i64 %tmp13, %tmp17
353  %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
354  %tmp20 = load i8, ptr %tmp19, align 1
355  %tmp21 = zext i8 %tmp20 to i64
356  %tmp22 = shl nuw nsw i64 %tmp21, 24
357  %tmp23 = or i64 %tmp18, %tmp22
358  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
359  %tmp25 = load i8, ptr %tmp24, align 1
360  %tmp26 = zext i8 %tmp25 to i64
361  %tmp27 = shl nuw nsw i64 %tmp26, 16
362  %tmp28 = or i64 %tmp23, %tmp27
363  %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
364  %tmp30 = load i8, ptr %tmp29, align 1
365  %tmp31 = zext i8 %tmp30 to i64
366  %tmp32 = shl nuw nsw i64 %tmp31, 8
367  %tmp33 = or i64 %tmp28, %tmp32
368  %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
369  %tmp35 = load i8, ptr %tmp34, align 1
370  %tmp36 = zext i8 %tmp35 to i64
371  %tmp37 = or i64 %tmp33, %tmp36
372  ret i64 %tmp37
373}
374
375; ptr p; // p[1] is 4 byte aligned
376; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
377define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
378; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
379; CHECK:       @ %bb.0:
380; CHECK-NEXT:    ldr r0, [r0, #1]
381; CHECK-NEXT:    mov r1, #65280
382; CHECK-NEXT:    and r2, r0, #65280
383; CHECK-NEXT:    and r1, r1, r0, lsr #8
384; CHECK-NEXT:    orr r1, r1, r0, lsr #24
385; CHECK-NEXT:    lsl r0, r0, #24
386; CHECK-NEXT:    orr r0, r0, r2, lsl #8
387; CHECK-NEXT:    orr r0, r0, r1
388; CHECK-NEXT:    mov pc, lr
389;
390; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
391; CHECK-ARMv6:       @ %bb.0:
392; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
393; CHECK-ARMv6-NEXT:    rev r0, r0
394; CHECK-ARMv6-NEXT:    bx lr
395;
396; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset:
397; CHECK-THUMBv6:       @ %bb.0:
398; CHECK-THUMBv6-NEXT:    movs r1, #1
399; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
400; CHECK-THUMBv6-NEXT:    rev r0, r0
401; CHECK-THUMBv6-NEXT:    bx lr
402;
403; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset:
404; CHECK-THUMBv7:       @ %bb.0:
405; CHECK-THUMBv7-NEXT:    movs r1, #1
406; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
407; CHECK-THUMBv7-NEXT:    rev r0, r0
408; CHECK-THUMBv7-NEXT:    bx lr
409
410
411  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
412  %tmp2 = load i8, ptr %tmp1, align 4
413  %tmp3 = zext i8 %tmp2 to i32
414  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
415  %tmp5 = load i8, ptr %tmp4, align 1
416  %tmp6 = zext i8 %tmp5 to i32
417  %tmp7 = shl nuw nsw i32 %tmp6, 8
418  %tmp8 = or i32 %tmp7, %tmp3
419  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
420  %tmp10 = load i8, ptr %tmp9, align 1
421  %tmp11 = zext i8 %tmp10 to i32
422  %tmp12 = shl nuw nsw i32 %tmp11, 16
423  %tmp13 = or i32 %tmp8, %tmp12
424  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
425  %tmp15 = load i8, ptr %tmp14, align 1
426  %tmp16 = zext i8 %tmp15 to i32
427  %tmp17 = shl nuw nsw i32 %tmp16, 24
428  %tmp18 = or i32 %tmp13, %tmp17
429  ret i32 %tmp18
430}
431
432; ptr p; // p[-4] is 4 byte aligned
433; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
434define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
435; CHECK-LABEL: load_i32_by_i8_neg_offset:
436; CHECK:       @ %bb.0:
437; CHECK-NEXT:    ldr r0, [r0, #-4]
438; CHECK-NEXT:    mov r1, #65280
439; CHECK-NEXT:    and r2, r0, #65280
440; CHECK-NEXT:    and r1, r1, r0, lsr #8
441; CHECK-NEXT:    orr r1, r1, r0, lsr #24
442; CHECK-NEXT:    lsl r0, r0, #24
443; CHECK-NEXT:    orr r0, r0, r2, lsl #8
444; CHECK-NEXT:    orr r0, r0, r1
445; CHECK-NEXT:    mov pc, lr
446;
447; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
448; CHECK-ARMv6:       @ %bb.0:
449; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
450; CHECK-ARMv6-NEXT:    rev r0, r0
451; CHECK-ARMv6-NEXT:    bx lr
452;
453; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset:
454; CHECK-THUMBv6:       @ %bb.0:
455; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
456; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
457; CHECK-THUMBv6-NEXT:    rev r0, r0
458; CHECK-THUMBv6-NEXT:    bx lr
459;
460; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset:
461; CHECK-THUMBv7:       @ %bb.0:
462; CHECK-THUMBv7-NEXT:    subs r0, r0, #4
463; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
464; CHECK-THUMBv7-NEXT:    rev r0, r0
465; CHECK-THUMBv7-NEXT:    bx lr
466
467
468  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
469  %tmp2 = load i8, ptr %tmp1, align 4
470  %tmp3 = zext i8 %tmp2 to i32
471  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
472  %tmp5 = load i8, ptr %tmp4, align 1
473  %tmp6 = zext i8 %tmp5 to i32
474  %tmp7 = shl nuw nsw i32 %tmp6, 8
475  %tmp8 = or i32 %tmp7, %tmp3
476  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
477  %tmp10 = load i8, ptr %tmp9, align 1
478  %tmp11 = zext i8 %tmp10 to i32
479  %tmp12 = shl nuw nsw i32 %tmp11, 16
480  %tmp13 = or i32 %tmp8, %tmp12
481  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
482  %tmp15 = load i8, ptr %tmp14, align 1
483  %tmp16 = zext i8 %tmp15 to i32
484  %tmp17 = shl nuw nsw i32 %tmp16, 24
485  %tmp18 = or i32 %tmp13, %tmp17
486  ret i32 %tmp18
487}
488
489; ptr p; // p[1] is 4 byte aligned
490; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
491define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
492; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
493; CHECK:       @ %bb.0:
494; CHECK-NEXT:    ldr r0, [r0, #1]
495; CHECK-NEXT:    mov pc, lr
496;
497; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
498; CHECK-ARMv6:       @ %bb.0:
499; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
500; CHECK-ARMv6-NEXT:    bx lr
501;
502; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
503; CHECK-THUMBv6:       @ %bb.0:
504; CHECK-THUMBv6-NEXT:    movs r1, #1
505; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
506; CHECK-THUMBv6-NEXT:    bx lr
507;
508; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap:
509; CHECK-THUMBv7:       @ %bb.0:
510; CHECK-THUMBv7-NEXT:    movs r1, #1
511; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
512; CHECK-THUMBv7-NEXT:    bx lr
513
514
515  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
516  %tmp2 = load i8, ptr %tmp1, align 1
517  %tmp3 = zext i8 %tmp2 to i32
518  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
519  %tmp5 = load i8, ptr %tmp4, align 1
520  %tmp6 = zext i8 %tmp5 to i32
521  %tmp7 = shl nuw nsw i32 %tmp6, 8
522  %tmp8 = or i32 %tmp7, %tmp3
523  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
524  %tmp10 = load i8, ptr %tmp9, align 1
525  %tmp11 = zext i8 %tmp10 to i32
526  %tmp12 = shl nuw nsw i32 %tmp11, 16
527  %tmp13 = or i32 %tmp8, %tmp12
528  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
529  %tmp15 = load i8, ptr %tmp14, align 4
530  %tmp16 = zext i8 %tmp15 to i32
531  %tmp17 = shl nuw nsw i32 %tmp16, 24
532  %tmp18 = or i32 %tmp13, %tmp17
533  ret i32 %tmp18
534}
535
536; ptr p; // p[-4] is 4 byte aligned
537; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
538define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
539; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
540; CHECK:       @ %bb.0:
541; CHECK-NEXT:    ldr r0, [r0, #-4]
542; CHECK-NEXT:    mov pc, lr
543;
544; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
545; CHECK-ARMv6:       @ %bb.0:
546; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
547; CHECK-ARMv6-NEXT:    bx lr
548;
549; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap:
550; CHECK-THUMBv6:       @ %bb.0:
551; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
552; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
553; CHECK-THUMBv6-NEXT:    bx lr
554;
555; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap:
556; CHECK-THUMBv7:       @ %bb.0:
557; CHECK-THUMBv7-NEXT:    subs r0, r0, #4
558; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
559; CHECK-THUMBv7-NEXT:    bx lr
560
561
562  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
563  %tmp2 = load i8, ptr %tmp1, align 1
564  %tmp3 = zext i8 %tmp2 to i32
565  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
566  %tmp5 = load i8, ptr %tmp4, align 1
567  %tmp6 = zext i8 %tmp5 to i32
568  %tmp7 = shl nuw nsw i32 %tmp6, 8
569  %tmp8 = or i32 %tmp7, %tmp3
570  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
571  %tmp10 = load i8, ptr %tmp9, align 1
572  %tmp11 = zext i8 %tmp10 to i32
573  %tmp12 = shl nuw nsw i32 %tmp11, 16
574  %tmp13 = or i32 %tmp8, %tmp12
575  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
576  %tmp15 = load i8, ptr %tmp14, align 4
577  %tmp16 = zext i8 %tmp15 to i32
578  %tmp17 = shl nuw nsw i32 %tmp16, 24
579  %tmp18 = or i32 %tmp13, %tmp17
580  ret i32 %tmp18
581}
582
583declare i16 @llvm.bswap.i16(i16)
584
585; ptr p; // p is 4 byte aligned
586; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
587define i32 @load_i32_by_bswap_i16(ptr %arg) {
588; CHECK-LABEL: load_i32_by_bswap_i16:
589; CHECK:       @ %bb.0:
590; CHECK-NEXT:    ldr r0, [r0]
591; CHECK-NEXT:    mov r1, #65280
592; CHECK-NEXT:    and r2, r0, #65280
593; CHECK-NEXT:    and r1, r1, r0, lsr #8
594; CHECK-NEXT:    orr r1, r1, r0, lsr #24
595; CHECK-NEXT:    lsl r0, r0, #24
596; CHECK-NEXT:    orr r0, r0, r2, lsl #8
597; CHECK-NEXT:    orr r0, r0, r1
598; CHECK-NEXT:    mov pc, lr
599;
600; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
601; CHECK-ARMv6:       @ %bb.0:
602; CHECK-ARMv6-NEXT:    ldr r0, [r0]
603; CHECK-ARMv6-NEXT:    rev r0, r0
604; CHECK-ARMv6-NEXT:    bx lr
605;
606; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16:
607; CHECK-THUMBv6:       @ %bb.0:
608; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
609; CHECK-THUMBv6-NEXT:    rev r0, r0
610; CHECK-THUMBv6-NEXT:    bx lr
611;
612; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16:
613; CHECK-THUMBv7:       @ %bb.0:
614; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
615; CHECK-THUMBv7-NEXT:    rev r0, r0
616; CHECK-THUMBv7-NEXT:    bx lr
617
618
619  %tmp1 = load i16, ptr %arg, align 4
620  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
621  %tmp2 = zext i16 %tmp11 to i32
622  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
623  %tmp4 = load i16, ptr %tmp3, align 1
624  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
625  %tmp5 = zext i16 %tmp41 to i32
626  %tmp6 = shl nuw nsw i32 %tmp5, 16
627  %tmp7 = or i32 %tmp6, %tmp2
628  ret i32 %tmp7
629}
630
631; ptr p; // p is 4 byte aligned
632; (i32) p[1] | (sext(p[0] << 16) to i32)
633define i32 @load_i32_by_sext_i16(ptr %arg) {
634; CHECK-LABEL: load_i32_by_sext_i16:
635; CHECK:       @ %bb.0:
636; CHECK-NEXT:    ldr r0, [r0]
637; CHECK-NEXT:    mov pc, lr
638;
639; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
640; CHECK-ARMv6:       @ %bb.0:
641; CHECK-ARMv6-NEXT:    ldr r0, [r0]
642; CHECK-ARMv6-NEXT:    bx lr
643;
644; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16:
645; CHECK-THUMBv6:       @ %bb.0:
646; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
647; CHECK-THUMBv6-NEXT:    bx lr
648;
649; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16:
650; CHECK-THUMBv7:       @ %bb.0:
651; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
652; CHECK-THUMBv7-NEXT:    bx lr
653  %tmp1 = load i16, ptr %arg, align 4
654  %tmp2 = sext i16 %tmp1 to i32
655  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
656  %tmp4 = load i16, ptr %tmp3, align 1
657  %tmp5 = zext i16 %tmp4 to i32
658  %tmp6 = shl nuw nsw i32 %tmp2, 16
659  %tmp7 = or i32 %tmp6, %tmp5
660  ret i32 %tmp7
661}
662
663; ptr arg; i32 i;
664; p = arg + 12;
665; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
666define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
667; CHECK-LABEL: load_i32_by_i8_base_offset_index:
668; CHECK:       @ %bb.0:
669; CHECK-NEXT:    add r0, r0, r1
670; CHECK-NEXT:    mov r1, #65280
671; CHECK-NEXT:    ldr r0, [r0, #12]
672; CHECK-NEXT:    and r2, r0, #65280
673; CHECK-NEXT:    and r1, r1, r0, lsr #8
674; CHECK-NEXT:    orr r1, r1, r0, lsr #24
675; CHECK-NEXT:    lsl r0, r0, #24
676; CHECK-NEXT:    orr r0, r0, r2, lsl #8
677; CHECK-NEXT:    orr r0, r0, r1
678; CHECK-NEXT:    mov pc, lr
679;
680; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
681; CHECK-ARMv6:       @ %bb.0:
682; CHECK-ARMv6-NEXT:    add r0, r0, r1
683; CHECK-ARMv6-NEXT:    ldr r0, [r0, #12]
684; CHECK-ARMv6-NEXT:    rev r0, r0
685; CHECK-ARMv6-NEXT:    bx lr
686;
687; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index:
688; CHECK-THUMBv6:       @ %bb.0:
689; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
690; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #12]
691; CHECK-THUMBv6-NEXT:    rev r0, r0
692; CHECK-THUMBv6-NEXT:    bx lr
693;
694; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index:
695; CHECK-THUMBv7:       @ %bb.0:
696; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
697; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #12]
698; CHECK-THUMBv7-NEXT:    rev r0, r0
699; CHECK-THUMBv7-NEXT:    bx lr
700  %tmp = add nuw nsw i32 %i, 3
701  %tmp2 = add nuw nsw i32 %i, 2
702  %tmp3 = add nuw nsw i32 %i, 1
703  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
704  %tmp5 = zext i32 %i to i64
705  %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
706  %tmp7 = load i8, ptr %tmp6, align 4
707  %tmp8 = zext i8 %tmp7 to i32
708  %tmp9 = zext i32 %tmp3 to i64
709  %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
710  %tmp11 = load i8, ptr %tmp10, align 1
711  %tmp12 = zext i8 %tmp11 to i32
712  %tmp13 = shl nuw nsw i32 %tmp12, 8
713  %tmp14 = or i32 %tmp13, %tmp8
714  %tmp15 = zext i32 %tmp2 to i64
715  %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
716  %tmp17 = load i8, ptr %tmp16, align 1
717  %tmp18 = zext i8 %tmp17 to i32
718  %tmp19 = shl nuw nsw i32 %tmp18, 16
719  %tmp20 = or i32 %tmp14, %tmp19
720  %tmp21 = zext i32 %tmp to i64
721  %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
722  %tmp23 = load i8, ptr %tmp22, align 1
723  %tmp24 = zext i8 %tmp23 to i32
724  %tmp25 = shl nuw i32 %tmp24, 24
725  %tmp26 = or i32 %tmp20, %tmp25
726  ret i32 %tmp26
727}
728
729; ptr arg; i32 i;
730; p = arg + 12;
731; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
732define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
733; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
734; CHECK:       @ %bb.0:
735; CHECK-NEXT:    add r0, r1, r0
736; CHECK-NEXT:    mov r1, #65280
737; CHECK-NEXT:    ldr r0, [r0, #13]
738; CHECK-NEXT:    and r2, r0, #65280
739; CHECK-NEXT:    and r1, r1, r0, lsr #8
740; CHECK-NEXT:    orr r1, r1, r0, lsr #24
741; CHECK-NEXT:    lsl r0, r0, #24
742; CHECK-NEXT:    orr r0, r0, r2, lsl #8
743; CHECK-NEXT:    orr r0, r0, r1
744; CHECK-NEXT:    mov pc, lr
745;
746; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
747; CHECK-ARMv6:       @ %bb.0:
748; CHECK-ARMv6-NEXT:    add r0, r1, r0
749; CHECK-ARMv6-NEXT:    ldr r0, [r0, #13]
750; CHECK-ARMv6-NEXT:    rev r0, r0
751; CHECK-ARMv6-NEXT:    bx lr
752;
753; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2:
754; CHECK-THUMBv6:       @ %bb.0:
755; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
756; CHECK-THUMBv6-NEXT:    movs r1, #13
757; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
758; CHECK-THUMBv6-NEXT:    rev r0, r0
759; CHECK-THUMBv6-NEXT:    bx lr
760;
761; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2:
762; CHECK-THUMBv7:       @ %bb.0:
763; CHECK-THUMBv7-NEXT:    adds r0, r1, r0
764; CHECK-THUMBv7-NEXT:    movs r1, #13
765; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
766; CHECK-THUMBv7-NEXT:    rev r0, r0
767; CHECK-THUMBv7-NEXT:    bx lr
768
769  %tmp = add nuw nsw i32 %i, 4
770  %tmp2 = add nuw nsw i32 %i, 3
771  %tmp3 = add nuw nsw i32 %i, 2
772  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
773  %tmp5 = add nuw nsw i32 %i, 1
774  %tmp27 = zext i32 %tmp5 to i64
775  %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
776  %tmp29 = load i8, ptr %tmp28, align 4
777  %tmp30 = zext i8 %tmp29 to i32
778  %tmp31 = zext i32 %tmp3 to i64
779  %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
780  %tmp33 = load i8, ptr %tmp32, align 1
781  %tmp34 = zext i8 %tmp33 to i32
782  %tmp35 = shl nuw nsw i32 %tmp34, 8
783  %tmp36 = or i32 %tmp35, %tmp30
784  %tmp37 = zext i32 %tmp2 to i64
785  %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
786  %tmp39 = load i8, ptr %tmp38, align 1
787  %tmp40 = zext i8 %tmp39 to i32
788  %tmp41 = shl nuw nsw i32 %tmp40, 16
789  %tmp42 = or i32 %tmp36, %tmp41
790  %tmp43 = zext i32 %tmp to i64
791  %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
792  %tmp45 = load i8, ptr %tmp44, align 1
793  %tmp46 = zext i8 %tmp45 to i32
794  %tmp47 = shl nuw i32 %tmp46, 24
795  %tmp48 = or i32 %tmp42, %tmp47
796  ret i32 %tmp48
797}
798
799; ptr p; // p is 2 byte aligned
800; (i32) p[0] | ((i32) p[1] << 8)
801define i32 @zext_load_i32_by_i8(ptr %arg) {
802; CHECK-LABEL: zext_load_i32_by_i8:
803; CHECK:       @ %bb.0:
804; CHECK-NEXT:    ldrb r1, [r0]
805; CHECK-NEXT:    ldrb r0, [r0, #1]
806; CHECK-NEXT:    orr r0, r1, r0, lsl #8
807; CHECK-NEXT:    mov pc, lr
808;
809; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
810; CHECK-ARMv6:       @ %bb.0:
811; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
812; CHECK-ARMv6-NEXT:    rev16 r0, r0
813; CHECK-ARMv6-NEXT:    bx lr
814;
815; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
816; CHECK-THUMBv6:       @ %bb.0:
817; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
818; CHECK-THUMBv6-NEXT:    rev16 r0, r0
819; CHECK-THUMBv6-NEXT:    bx lr
820;
821; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
822; CHECK-THUMBv7:       @ %bb.0:
823; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
824; CHECK-THUMBv7-NEXT:    rev16 r0, r0
825; CHECK-THUMBv7-NEXT:    bx lr
826
827  %tmp2 = load i8, ptr %arg, align 2
828  %tmp3 = zext i8 %tmp2 to i32
829  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
830  %tmp5 = load i8, ptr %tmp4, align 1
831  %tmp6 = zext i8 %tmp5 to i32
832  %tmp7 = shl nuw nsw i32 %tmp6, 8
833  %tmp8 = or i32 %tmp7, %tmp3
834  ret i32 %tmp8
835}
836
837; ptr p; // p is 2 byte aligned
838; ((i32) p[0] << 8) | ((i32) p[1] << 16)
839define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
840; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
841; CHECK:       @ %bb.0:
842; CHECK-NEXT:    ldrb r1, [r0]
843; CHECK-NEXT:    ldrb r0, [r0, #1]
844; CHECK-NEXT:    lsl r0, r0, #16
845; CHECK-NEXT:    orr r0, r0, r1, lsl #8
846; CHECK-NEXT:    mov pc, lr
847;
848; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
849; CHECK-ARMv6:       @ %bb.0:
850; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
851; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
852; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
853; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
854; CHECK-ARMv6-NEXT:    bx lr
855;
856; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8:
857; CHECK-THUMBv6:       @ %bb.0:
858; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
859; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
860; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
861; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
862; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
863; CHECK-THUMBv6-NEXT:    bx lr
864;
865; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8:
866; CHECK-THUMBv7:       @ %bb.0:
867; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
868; CHECK-THUMBv7-NEXT:    lsls r1, r1, #8
869; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
870; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
871; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
872; CHECK-THUMBv7-NEXT:    bx lr
873
874  %tmp2 = load i8, ptr %arg, align 2
875  %tmp3 = zext i8 %tmp2 to i32
876  %tmp30 = shl nuw nsw i32 %tmp3, 8
877  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
878  %tmp5 = load i8, ptr %tmp4, align 1
879  %tmp6 = zext i8 %tmp5 to i32
880  %tmp7 = shl nuw nsw i32 %tmp6, 16
881  %tmp8 = or i32 %tmp7, %tmp30
882  ret i32 %tmp8
883}
884
885; ptr p; // p is 2 byte aligned
886; ((i32) p[0] << 16) | ((i32) p[1] << 24)
887define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
888; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
889; CHECK:       @ %bb.0:
890; CHECK-NEXT:    ldrb r1, [r0]
891; CHECK-NEXT:    ldrb r0, [r0, #1]
892; CHECK-NEXT:    lsl r0, r0, #24
893; CHECK-NEXT:    orr r0, r0, r1, lsl #16
894; CHECK-NEXT:    mov pc, lr
895;
896; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
897; CHECK-ARMv6:       @ %bb.0:
898; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
899; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
900; CHECK-ARMv6-NEXT:    lsl r0, r0, #24
901; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #16
902; CHECK-ARMv6-NEXT:    bx lr
903;
904; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16:
905; CHECK-THUMBv6:       @ %bb.0:
906; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
907; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
908; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
909; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
910; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
911; CHECK-THUMBv6-NEXT:    bx lr
912;
913; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16:
914; CHECK-THUMBv7:       @ %bb.0:
915; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
916; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
917; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
918; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
919; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
920; CHECK-THUMBv7-NEXT:    bx lr
921
922  %tmp2 = load i8, ptr %arg, align 2
923  %tmp3 = zext i8 %tmp2 to i32
924  %tmp30 = shl nuw nsw i32 %tmp3, 16
925  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
926  %tmp5 = load i8, ptr %tmp4, align 1
927  %tmp6 = zext i8 %tmp5 to i32
928  %tmp7 = shl nuw nsw i32 %tmp6, 24
929  %tmp8 = or i32 %tmp7, %tmp30
930  ret i32 %tmp8
931}
932
933; ptr p; // p is 2 byte aligned
934; (i32) p[1] | ((i32) p[0] << 8)
935define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
936; CHECK-LABEL: zext_load_i32_by_i8_bswap:
937; CHECK:       @ %bb.0:
938; CHECK-NEXT:    ldrh r0, [r0]
939; CHECK-NEXT:    mov pc, lr
940;
941; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
942; CHECK-ARMv6:       @ %bb.0:
943; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
944; CHECK-ARMv6-NEXT:    bx lr
945;
946; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
947; CHECK-THUMBv6:       @ %bb.0:
948; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
949; CHECK-THUMBv6-NEXT:    bx lr
950;
951; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
952; CHECK-THUMBv7:       @ %bb.0:
953; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
954; CHECK-THUMBv7-NEXT:    bx lr
955
956  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
957  %tmp2 = load i8, ptr %tmp1, align 1
958  %tmp3 = zext i8 %tmp2 to i32
959  %tmp5 = load i8, ptr %arg, align 2
960  %tmp6 = zext i8 %tmp5 to i32
961  %tmp7 = shl nuw nsw i32 %tmp6, 8
962  %tmp8 = or i32 %tmp7, %tmp3
963  ret i32 %tmp8
964}
965
966; ptr p; // p is 2 byte aligned
967; ((i32) p[1] << 8) | ((i32) p[0] << 16)
968define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
969; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
970; CHECK:       @ %bb.0:
971; CHECK-NEXT:    ldrb r1, [r0]
972; CHECK-NEXT:    ldrb r0, [r0, #1]
973; CHECK-NEXT:    lsl r1, r1, #16
974; CHECK-NEXT:    orr r0, r1, r0, lsl #8
975; CHECK-NEXT:    mov pc, lr
976;
977; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
978; CHECK-ARMv6:       @ %bb.0:
979; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
980; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
981; CHECK-ARMv6-NEXT:    lsl r1, r1, #16
982; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #8
983; CHECK-ARMv6-NEXT:    bx lr
984;
985; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
986; CHECK-THUMBv6:       @ %bb.0:
987; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
988; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
989; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
990; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
991; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
992; CHECK-THUMBv6-NEXT:    bx lr
993;
994; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8:
995; CHECK-THUMBv7:       @ %bb.0:
996; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #1]
997; CHECK-THUMBv7-NEXT:    lsls r1, r1, #8
998; CHECK-THUMBv7-NEXT:    ldrb r0, [r0]
999; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
1000; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1001; CHECK-THUMBv7-NEXT:    bx lr
1002
1003  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1004  %tmp2 = load i8, ptr %tmp1, align 1
1005  %tmp3 = zext i8 %tmp2 to i32
1006  %tmp30 = shl nuw nsw i32 %tmp3, 8
1007  %tmp5 = load i8, ptr %arg, align 2
1008  %tmp6 = zext i8 %tmp5 to i32
1009  %tmp7 = shl nuw nsw i32 %tmp6, 16
1010  %tmp8 = or i32 %tmp7, %tmp30
1011  ret i32 %tmp8
1012}
1013
1014; ptr p; // p is 2 byte aligned
1015; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1016define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
1017; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1018; CHECK:       @ %bb.0:
1019; CHECK-NEXT:    ldrb r1, [r0]
1020; CHECK-NEXT:    ldrb r0, [r0, #1]
1021; CHECK-NEXT:    lsl r1, r1, #24
1022; CHECK-NEXT:    orr r0, r1, r0, lsl #16
1023; CHECK-NEXT:    mov pc, lr
1024;
1025; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1026; CHECK-ARMv6:       @ %bb.0:
1027; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
1028; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
1029; CHECK-ARMv6-NEXT:    lsl r1, r1, #24
1030; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #16
1031; CHECK-ARMv6-NEXT:    bx lr
1032;
1033; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1034; CHECK-THUMBv6:       @ %bb.0:
1035; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
1036; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
1037; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
1038; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
1039; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
1040; CHECK-THUMBv6-NEXT:    bx lr
1041;
1042; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1043; CHECK-THUMBv7:       @ %bb.0:
1044; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #1]
1045; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
1046; CHECK-THUMBv7-NEXT:    ldrb r0, [r0]
1047; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
1048; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1049; CHECK-THUMBv7-NEXT:    bx lr
1050
1051  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1052  %tmp2 = load i8, ptr %tmp1, align 1
1053  %tmp3 = zext i8 %tmp2 to i32
1054  %tmp30 = shl nuw nsw i32 %tmp3, 16
1055  %tmp5 = load i8, ptr %arg, align 2
1056  %tmp6 = zext i8 %tmp5 to i32
1057  %tmp7 = shl nuw nsw i32 %tmp6, 24
1058  %tmp8 = or i32 %tmp7, %tmp30
1059  ret i32 %tmp8
1060}
1061
1062; ptr p;
1063; ptr p1.i16 = (ptr) p;
1064; (p1.i16[0] << 8) | ((i16) p[2])
1065;
1066; This is essentialy a i16 load from p[1], but we don't fold the pattern now
1067; because in the original DAG we don't have p[1] address available
1068define i16 @load_i16_from_nonzero_offset(ptr %p) {
1069; CHECK-LABEL: load_i16_from_nonzero_offset:
1070; CHECK:       @ %bb.0:
1071; CHECK-NEXT:    ldrh r1, [r0]
1072; CHECK-NEXT:    ldrb r0, [r0, #2]
1073; CHECK-NEXT:    orr r0, r0, r1, lsl #8
1074; CHECK-NEXT:    mov pc, lr
1075;
1076; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
1077; CHECK-ARMv6:       @ %bb.0:
1078; CHECK-ARMv6-NEXT:    ldrh r1, [r0]
1079; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #2]
1080; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
1081; CHECK-ARMv6-NEXT:    bx lr
1082;
1083; CHECK-THUMBv6-LABEL: load_i16_from_nonzero_offset:
1084; CHECK-THUMBv6:       @ %bb.0:
1085; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #2]
1086; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
1087; CHECK-THUMBv6-NEXT:    lsls r0, r0, #8
1088; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
1089; CHECK-THUMBv6-NEXT:    bx lr
1090;
1091; CHECK-THUMBv7-LABEL: load_i16_from_nonzero_offset:
1092; CHECK-THUMBv7:       @ %bb.0:
1093; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #2]
1094; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
1095; CHECK-THUMBv7-NEXT:    lsls r0, r0, #8
1096; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1097; CHECK-THUMBv7-NEXT:    bx lr
1098
1099  %p2.i8 = getelementptr i8, ptr %p, i64 2
1100  %v1 = load i16, ptr %p
1101  %v2.i8 = load i8, ptr %p2.i8
1102  %v2 = zext i8 %v2.i8 to i16
1103  %v1.shl = shl i16 %v1, 8
1104  %res = or i16 %v1.shl, %v2
1105  ret i16 %res
1106}
1107