xref: /llvm-project/llvm/test/CodeGen/ARM/load-combine.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=armv6-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
4; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6
5; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7
6
7; ptr p; // p is 1 byte aligned
8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9define i32 @load_i32_by_i8_unaligned(ptr %arg) {
10; CHECK-LABEL: load_i32_by_i8_unaligned:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    ldrb r2, [r0, #1]
13; CHECK-NEXT:    ldrb r1, [r0]
14; CHECK-NEXT:    ldrb r3, [r0, #2]
15; CHECK-NEXT:    ldrb r0, [r0, #3]
16; CHECK-NEXT:    orr r1, r1, r2, lsl #8
17; CHECK-NEXT:    orr r1, r1, r3, lsl #16
18; CHECK-NEXT:    orr r0, r1, r0, lsl #24
19; CHECK-NEXT:    mov pc, lr
20;
21; CHECK-ARMv6-LABEL: load_i32_by_i8_unaligned:
22; CHECK-ARMv6:       @ %bb.0:
23; CHECK-ARMv6-NEXT:    ldrb r2, [r0, #1]
24; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
25; CHECK-ARMv6-NEXT:    ldrb r3, [r0, #2]
26; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #3]
27; CHECK-ARMv6-NEXT:    orr r1, r1, r2, lsl #8
28; CHECK-ARMv6-NEXT:    orr r1, r1, r3, lsl #16
29; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #24
30; CHECK-ARMv6-NEXT:    bx lr
31;
32; CHECK-THUMBv6-LABEL: load_i32_by_i8_unaligned:
33; CHECK-THUMBv6:       @ %bb.0:
34; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
35; CHECK-THUMBv6-NEXT:    ldrb r2, [r0, #1]
36; CHECK-THUMBv6-NEXT:    lsls r2, r2, #8
37; CHECK-THUMBv6-NEXT:    adds r1, r2, r1
38; CHECK-THUMBv6-NEXT:    ldrb r2, [r0, #2]
39; CHECK-THUMBv6-NEXT:    lsls r2, r2, #16
40; CHECK-THUMBv6-NEXT:    adds r1, r1, r2
41; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #3]
42; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
43; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
44; CHECK-THUMBv6-NEXT:    bx lr
45;
46; CHECK-THUMBv7-LABEL: load_i32_by_i8_unaligned:
47; CHECK-THUMBv7:       @ %bb.0:
48; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
49; CHECK-THUMBv7-NEXT:    bx lr
50
51  %tmp2 = load i8, ptr %arg, align 1
52  %tmp3 = zext i8 %tmp2 to i32
53  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
54  %tmp5 = load i8, ptr %tmp4, align 1
55  %tmp6 = zext i8 %tmp5 to i32
56  %tmp7 = shl nuw nsw i32 %tmp6, 8
57  %tmp8 = or i32 %tmp7, %tmp3
58  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
59  %tmp10 = load i8, ptr %tmp9, align 1
60  %tmp11 = zext i8 %tmp10 to i32
61  %tmp12 = shl nuw nsw i32 %tmp11, 16
62  %tmp13 = or i32 %tmp8, %tmp12
63  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
64  %tmp15 = load i8, ptr %tmp14, align 1
65  %tmp16 = zext i8 %tmp15 to i32
66  %tmp17 = shl nuw nsw i32 %tmp16, 24
67  %tmp18 = or i32 %tmp13, %tmp17
68  ret i32 %tmp18
69}
70
71; ptr p; // p is 4 byte aligned
72; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
73define i32 @load_i32_by_i8_aligned(ptr %arg) {
74; CHECK-LABEL: load_i32_by_i8_aligned:
75; CHECK:       @ %bb.0:
76; CHECK-NEXT:    ldr r0, [r0]
77; CHECK-NEXT:    mov pc, lr
78;
79; CHECK-ARMv6-LABEL: load_i32_by_i8_aligned:
80; CHECK-ARMv6:       @ %bb.0:
81; CHECK-ARMv6-NEXT:    ldr r0, [r0]
82; CHECK-ARMv6-NEXT:    bx lr
83;
84; CHECK-THUMBv6-LABEL: load_i32_by_i8_aligned:
85; CHECK-THUMBv6:       @ %bb.0:
86; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
87; CHECK-THUMBv6-NEXT:    bx lr
88;
89; CHECK-THUMBv7-LABEL: load_i32_by_i8_aligned:
90; CHECK-THUMBv7:       @ %bb.0:
91; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
92; CHECK-THUMBv7-NEXT:    bx lr
93
94  %tmp2 = load i8, ptr %arg, align 4
95  %tmp3 = zext i8 %tmp2 to i32
96  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
97  %tmp5 = load i8, ptr %tmp4, align 1
98  %tmp6 = zext i8 %tmp5 to i32
99  %tmp7 = shl nuw nsw i32 %tmp6, 8
100  %tmp8 = or i32 %tmp7, %tmp3
101  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
102  %tmp10 = load i8, ptr %tmp9, align 1
103  %tmp11 = zext i8 %tmp10 to i32
104  %tmp12 = shl nuw nsw i32 %tmp11, 16
105  %tmp13 = or i32 %tmp8, %tmp12
106  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
107  %tmp15 = load i8, ptr %tmp14, align 1
108  %tmp16 = zext i8 %tmp15 to i32
109  %tmp17 = shl nuw nsw i32 %tmp16, 24
110  %tmp18 = or i32 %tmp13, %tmp17
111  ret i32 %tmp18
112}
113
114; ptr p; // p is 4 byte aligned
115; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
116define i32 @load_i32_by_i8_bswap(ptr %arg) {
117; BSWAP is not supported by 32 bit target
118; CHECK-LABEL: load_i32_by_i8_bswap:
119; CHECK:       @ %bb.0:
120; CHECK-NEXT:    ldr r0, [r0]
121; CHECK-NEXT:    mov r1, #65280
122; CHECK-NEXT:    and r2, r0, #65280
123; CHECK-NEXT:    and r1, r1, r0, lsr #8
124; CHECK-NEXT:    orr r1, r1, r0, lsr #24
125; CHECK-NEXT:    lsl r0, r0, #24
126; CHECK-NEXT:    orr r0, r0, r2, lsl #8
127; CHECK-NEXT:    orr r0, r0, r1
128; CHECK-NEXT:    mov pc, lr
129;
130; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
131; CHECK-ARMv6:       @ %bb.0:
132; CHECK-ARMv6-NEXT:    ldr r0, [r0]
133; CHECK-ARMv6-NEXT:    rev r0, r0
134; CHECK-ARMv6-NEXT:    bx lr
135;
136; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap:
137; CHECK-THUMBv6:       @ %bb.0:
138; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
139; CHECK-THUMBv6-NEXT:    rev r0, r0
140; CHECK-THUMBv6-NEXT:    bx lr
141;
142; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap:
143; CHECK-THUMBv7:       @ %bb.0:
144; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
145; CHECK-THUMBv7-NEXT:    rev r0, r0
146; CHECK-THUMBv7-NEXT:    bx lr
147
148  %tmp1 = load i8, ptr %arg, align 4
149  %tmp2 = zext i8 %tmp1 to i32
150  %tmp3 = shl nuw nsw i32 %tmp2, 24
151  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
152  %tmp5 = load i8, ptr %tmp4, align 1
153  %tmp6 = zext i8 %tmp5 to i32
154  %tmp7 = shl nuw nsw i32 %tmp6, 16
155  %tmp8 = or i32 %tmp7, %tmp3
156  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
157  %tmp10 = load i8, ptr %tmp9, align 1
158  %tmp11 = zext i8 %tmp10 to i32
159  %tmp12 = shl nuw nsw i32 %tmp11, 8
160  %tmp13 = or i32 %tmp8, %tmp12
161  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
162  %tmp15 = load i8, ptr %tmp14, align 1
163  %tmp16 = zext i8 %tmp15 to i32
164  %tmp17 = or i32 %tmp13, %tmp16
165  ret i32 %tmp17
166}
167
168; ptr p; // p is 8 byte aligned
169; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
170define i64 @load_i64_by_i8(ptr %arg) {
171; CHECK-LABEL: load_i64_by_i8:
172; CHECK:       @ %bb.0:
173; CHECK-NEXT:    ldr r2, [r0]
174; CHECK-NEXT:    ldr r1, [r0, #4]
175; CHECK-NEXT:    mov r0, r2
176; CHECK-NEXT:    mov pc, lr
177;
178; CHECK-ARMv6-LABEL: load_i64_by_i8:
179; CHECK-ARMv6:       @ %bb.0:
180; CHECK-ARMv6-NEXT:    ldrd r0, r1, [r0]
181; CHECK-ARMv6-NEXT:    bx lr
182;
183; CHECK-THUMBv6-LABEL: load_i64_by_i8:
184; CHECK-THUMBv6:       @ %bb.0:
185; CHECK-THUMBv6-NEXT:    ldr r2, [r0]
186; CHECK-THUMBv6-NEXT:    ldr r1, [r0, #4]
187; CHECK-THUMBv6-NEXT:    mov r0, r2
188; CHECK-THUMBv6-NEXT:    bx lr
189;
190; CHECK-THUMBv7-LABEL: load_i64_by_i8:
191; CHECK-THUMBv7:       @ %bb.0:
192; CHECK-THUMBv7-NEXT:    ldrd r0, r1, [r0]
193; CHECK-THUMBv7-NEXT:    bx lr
194
195  %tmp1 = load i8, ptr %arg, align 8
196  %tmp2 = zext i8 %tmp1 to i64
197  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
198  %tmp4 = load i8, ptr %tmp3, align 1
199  %tmp5 = zext i8 %tmp4 to i64
200  %tmp6 = shl nuw nsw i64 %tmp5, 8
201  %tmp7 = or i64 %tmp6, %tmp2
202  %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
203  %tmp9 = load i8, ptr %tmp8, align 1
204  %tmp10 = zext i8 %tmp9 to i64
205  %tmp11 = shl nuw nsw i64 %tmp10, 16
206  %tmp12 = or i64 %tmp7, %tmp11
207  %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
208  %tmp14 = load i8, ptr %tmp13, align 1
209  %tmp15 = zext i8 %tmp14 to i64
210  %tmp16 = shl nuw nsw i64 %tmp15, 24
211  %tmp17 = or i64 %tmp12, %tmp16
212  %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
213  %tmp19 = load i8, ptr %tmp18, align 1
214  %tmp20 = zext i8 %tmp19 to i64
215  %tmp21 = shl nuw nsw i64 %tmp20, 32
216  %tmp22 = or i64 %tmp17, %tmp21
217  %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
218  %tmp24 = load i8, ptr %tmp23, align 1
219  %tmp25 = zext i8 %tmp24 to i64
220  %tmp26 = shl nuw nsw i64 %tmp25, 40
221  %tmp27 = or i64 %tmp22, %tmp26
222  %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
223  %tmp29 = load i8, ptr %tmp28, align 1
224  %tmp30 = zext i8 %tmp29 to i64
225  %tmp31 = shl nuw nsw i64 %tmp30, 48
226  %tmp32 = or i64 %tmp27, %tmp31
227  %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
228  %tmp34 = load i8, ptr %tmp33, align 1
229  %tmp35 = zext i8 %tmp34 to i64
230  %tmp36 = shl nuw i64 %tmp35, 56
231  %tmp37 = or i64 %tmp32, %tmp36
232  ret i64 %tmp37
233}
234
235; ptr p; // p is 8 byte aligned
236; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
237define i64 @load_i64_by_i8_bswap(ptr %arg) {
238; CHECK-LABEL: load_i64_by_i8_bswap:
239; CHECK:       @ %bb.0:
240; CHECK-NEXT:    ldr r1, [r0]
241; CHECK-NEXT:    mov r12, #65280
242; CHECK-NEXT:    ldr r0, [r0, #4]
243; CHECK-NEXT:    and r2, r0, #65280
244; CHECK-NEXT:    and r3, r12, r0, lsr #8
245; CHECK-NEXT:    orr r3, r3, r0, lsr #24
246; CHECK-NEXT:    lsl r0, r0, #24
247; CHECK-NEXT:    orr r0, r0, r2, lsl #8
248; CHECK-NEXT:    and r2, r12, r1, lsr #8
249; CHECK-NEXT:    orr r0, r0, r3
250; CHECK-NEXT:    and r3, r1, #65280
251; CHECK-NEXT:    orr r2, r2, r1, lsr #24
252; CHECK-NEXT:    lsl r1, r1, #24
253; CHECK-NEXT:    orr r1, r1, r3, lsl #8
254; CHECK-NEXT:    orr r1, r1, r2
255; CHECK-NEXT:    mov pc, lr
256;
257; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
258; CHECK-ARMv6:       @ %bb.0:
259; CHECK-ARMv6-NEXT:    ldrd r2, r3, [r0]
260; CHECK-ARMv6-NEXT:    rev r0, r3
261; CHECK-ARMv6-NEXT:    rev r1, r2
262; CHECK-ARMv6-NEXT:    bx lr
263;
264; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap:
265; CHECK-THUMBv6:       @ %bb.0:
266; CHECK-THUMBv6-NEXT:    ldr r1, [r0]
267; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #4]
268; CHECK-THUMBv6-NEXT:    rev r0, r0
269; CHECK-THUMBv6-NEXT:    rev r1, r1
270; CHECK-THUMBv6-NEXT:    bx lr
271;
272; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap:
273; CHECK-THUMBv7:       @ %bb.0:
274; CHECK-THUMBv7-NEXT:    ldrd r1, r0, [r0]
275; CHECK-THUMBv7-NEXT:    rev r0, r0
276; CHECK-THUMBv7-NEXT:    rev r1, r1
277; CHECK-THUMBv7-NEXT:    bx lr
278
279  %tmp1 = load i8, ptr %arg, align 8
280  %tmp2 = zext i8 %tmp1 to i64
281  %tmp3 = shl nuw i64 %tmp2, 56
282  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
283  %tmp5 = load i8, ptr %tmp4, align 1
284  %tmp6 = zext i8 %tmp5 to i64
285  %tmp7 = shl nuw nsw i64 %tmp6, 48
286  %tmp8 = or i64 %tmp7, %tmp3
287  %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
288  %tmp10 = load i8, ptr %tmp9, align 1
289  %tmp11 = zext i8 %tmp10 to i64
290  %tmp12 = shl nuw nsw i64 %tmp11, 40
291  %tmp13 = or i64 %tmp8, %tmp12
292  %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
293  %tmp15 = load i8, ptr %tmp14, align 1
294  %tmp16 = zext i8 %tmp15 to i64
295  %tmp17 = shl nuw nsw i64 %tmp16, 32
296  %tmp18 = or i64 %tmp13, %tmp17
297  %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
298  %tmp20 = load i8, ptr %tmp19, align 1
299  %tmp21 = zext i8 %tmp20 to i64
300  %tmp22 = shl nuw nsw i64 %tmp21, 24
301  %tmp23 = or i64 %tmp18, %tmp22
302  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
303  %tmp25 = load i8, ptr %tmp24, align 1
304  %tmp26 = zext i8 %tmp25 to i64
305  %tmp27 = shl nuw nsw i64 %tmp26, 16
306  %tmp28 = or i64 %tmp23, %tmp27
307  %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
308  %tmp30 = load i8, ptr %tmp29, align 1
309  %tmp31 = zext i8 %tmp30 to i64
310  %tmp32 = shl nuw nsw i64 %tmp31, 8
311  %tmp33 = or i64 %tmp28, %tmp32
312  %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
313  %tmp35 = load i8, ptr %tmp34, align 1
314  %tmp36 = zext i8 %tmp35 to i64
315  %tmp37 = or i64 %tmp33, %tmp36
316  ret i64 %tmp37
317}
318
319; ptr p; // p[1] is 4 byte aligned
320; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
321define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
322; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
323; CHECK:       @ %bb.0:
324; CHECK-NEXT:    ldr r0, [r0, #1]
325; CHECK-NEXT:    mov pc, lr
326;
327; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
328; CHECK-ARMv6:       @ %bb.0:
329; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
330; CHECK-ARMv6-NEXT:    bx lr
331;
332; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset:
333; CHECK-THUMBv6:       @ %bb.0:
334; CHECK-THUMBv6-NEXT:    movs r1, #1
335; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
336; CHECK-THUMBv6-NEXT:    bx lr
337;
338; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset:
339; CHECK-THUMBv7:       @ %bb.0:
340; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #1]
341; CHECK-THUMBv7-NEXT:    bx lr
342
343
344  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
345  %tmp2 = load i8, ptr %tmp1, align 4
346  %tmp3 = zext i8 %tmp2 to i32
347  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
348  %tmp5 = load i8, ptr %tmp4, align 1
349  %tmp6 = zext i8 %tmp5 to i32
350  %tmp7 = shl nuw nsw i32 %tmp6, 8
351  %tmp8 = or i32 %tmp7, %tmp3
352  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
353  %tmp10 = load i8, ptr %tmp9, align 1
354  %tmp11 = zext i8 %tmp10 to i32
355  %tmp12 = shl nuw nsw i32 %tmp11, 16
356  %tmp13 = or i32 %tmp8, %tmp12
357  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
358  %tmp15 = load i8, ptr %tmp14, align 1
359  %tmp16 = zext i8 %tmp15 to i32
360  %tmp17 = shl nuw nsw i32 %tmp16, 24
361  %tmp18 = or i32 %tmp13, %tmp17
362  ret i32 %tmp18
363}
364
365; ptr p; // p[-4] is 4 byte aligned
366; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
367define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
368; CHECK-LABEL: load_i32_by_i8_neg_offset:
369; CHECK:       @ %bb.0:
370; CHECK-NEXT:    ldr r0, [r0, #-4]
371; CHECK-NEXT:    mov pc, lr
372;
373; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
374; CHECK-ARMv6:       @ %bb.0:
375; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
376; CHECK-ARMv6-NEXT:    bx lr
377;
378; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset:
379; CHECK-THUMBv6:       @ %bb.0:
380; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
381; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
382; CHECK-THUMBv6-NEXT:    bx lr
383;
384; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset:
385; CHECK-THUMBv7:       @ %bb.0:
386; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #-4]
387; CHECK-THUMBv7-NEXT:    bx lr
388
389
390  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
391  %tmp2 = load i8, ptr %tmp1, align 4
392  %tmp3 = zext i8 %tmp2 to i32
393  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
394  %tmp5 = load i8, ptr %tmp4, align 1
395  %tmp6 = zext i8 %tmp5 to i32
396  %tmp7 = shl nuw nsw i32 %tmp6, 8
397  %tmp8 = or i32 %tmp7, %tmp3
398  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
399  %tmp10 = load i8, ptr %tmp9, align 1
400  %tmp11 = zext i8 %tmp10 to i32
401  %tmp12 = shl nuw nsw i32 %tmp11, 16
402  %tmp13 = or i32 %tmp8, %tmp12
403  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
404  %tmp15 = load i8, ptr %tmp14, align 1
405  %tmp16 = zext i8 %tmp15 to i32
406  %tmp17 = shl nuw nsw i32 %tmp16, 24
407  %tmp18 = or i32 %tmp13, %tmp17
408  ret i32 %tmp18
409}
410
411; ptr p; // p[1] is 4 byte aligned
412; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
413define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
414; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
415; CHECK:       @ %bb.0:
416; CHECK-NEXT:    ldr r0, [r0, #1]
417; CHECK-NEXT:    mov r1, #65280
418; CHECK-NEXT:    and r2, r0, #65280
419; CHECK-NEXT:    and r1, r1, r0, lsr #8
420; CHECK-NEXT:    orr r1, r1, r0, lsr #24
421; CHECK-NEXT:    lsl r0, r0, #24
422; CHECK-NEXT:    orr r0, r0, r2, lsl #8
423; CHECK-NEXT:    orr r0, r0, r1
424; CHECK-NEXT:    mov pc, lr
425;
426; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
427; CHECK-ARMv6:       @ %bb.0:
428; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
429; CHECK-ARMv6-NEXT:    rev r0, r0
430; CHECK-ARMv6-NEXT:    bx lr
431;
432; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
433; CHECK-THUMBv6:       @ %bb.0:
434; CHECK-THUMBv6-NEXT:    movs r1, #1
435; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
436; CHECK-THUMBv6-NEXT:    rev r0, r0
437; CHECK-THUMBv6-NEXT:    bx lr
438;
439; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap:
440; CHECK-THUMBv7:       @ %bb.0:
441; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #1]
442; CHECK-THUMBv7-NEXT:    rev r0, r0
443; CHECK-THUMBv7-NEXT:    bx lr
444
445
446  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
447  %tmp2 = load i8, ptr %tmp1, align 1
448  %tmp3 = zext i8 %tmp2 to i32
449  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
450  %tmp5 = load i8, ptr %tmp4, align 1
451  %tmp6 = zext i8 %tmp5 to i32
452  %tmp7 = shl nuw nsw i32 %tmp6, 8
453  %tmp8 = or i32 %tmp7, %tmp3
454  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
455  %tmp10 = load i8, ptr %tmp9, align 1
456  %tmp11 = zext i8 %tmp10 to i32
457  %tmp12 = shl nuw nsw i32 %tmp11, 16
458  %tmp13 = or i32 %tmp8, %tmp12
459  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
460  %tmp15 = load i8, ptr %tmp14, align 4
461  %tmp16 = zext i8 %tmp15 to i32
462  %tmp17 = shl nuw nsw i32 %tmp16, 24
463  %tmp18 = or i32 %tmp13, %tmp17
464  ret i32 %tmp18
465}
466
467; ptr p; // p[-4] is 4 byte aligned
468; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
469define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
470; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
471; CHECK:       @ %bb.0:
472; CHECK-NEXT:    ldr r0, [r0, #-4]
473; CHECK-NEXT:    mov r1, #65280
474; CHECK-NEXT:    and r2, r0, #65280
475; CHECK-NEXT:    and r1, r1, r0, lsr #8
476; CHECK-NEXT:    orr r1, r1, r0, lsr #24
477; CHECK-NEXT:    lsl r0, r0, #24
478; CHECK-NEXT:    orr r0, r0, r2, lsl #8
479; CHECK-NEXT:    orr r0, r0, r1
480; CHECK-NEXT:    mov pc, lr
481;
482; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
483; CHECK-ARMv6:       @ %bb.0:
484; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
485; CHECK-ARMv6-NEXT:    rev r0, r0
486; CHECK-ARMv6-NEXT:    bx lr
487;
488; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap:
489; CHECK-THUMBv6:       @ %bb.0:
490; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
491; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
492; CHECK-THUMBv6-NEXT:    rev r0, r0
493; CHECK-THUMBv6-NEXT:    bx lr
494;
495; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap:
496; CHECK-THUMBv7:       @ %bb.0:
497; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #-4]
498; CHECK-THUMBv7-NEXT:    rev r0, r0
499; CHECK-THUMBv7-NEXT:    bx lr
500
501
502  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
503  %tmp2 = load i8, ptr %tmp1, align 1
504  %tmp3 = zext i8 %tmp2 to i32
505  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
506  %tmp5 = load i8, ptr %tmp4, align 1
507  %tmp6 = zext i8 %tmp5 to i32
508  %tmp7 = shl nuw nsw i32 %tmp6, 8
509  %tmp8 = or i32 %tmp7, %tmp3
510  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
511  %tmp10 = load i8, ptr %tmp9, align 1
512  %tmp11 = zext i8 %tmp10 to i32
513  %tmp12 = shl nuw nsw i32 %tmp11, 16
514  %tmp13 = or i32 %tmp8, %tmp12
515  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
516  %tmp15 = load i8, ptr %tmp14, align 4
517  %tmp16 = zext i8 %tmp15 to i32
518  %tmp17 = shl nuw nsw i32 %tmp16, 24
519  %tmp18 = or i32 %tmp13, %tmp17
520  ret i32 %tmp18
521}
522
523declare i16 @llvm.bswap.i16(i16)
524
525; ptr p; // p is 4 byte aligned
526; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
527define i32 @load_i32_by_bswap_i16(ptr %arg) {
528; CHECK-LABEL: load_i32_by_bswap_i16:
529; CHECK:       @ %bb.0:
530; CHECK-NEXT:    ldr r0, [r0]
531; CHECK-NEXT:    mov r1, #65280
532; CHECK-NEXT:    and r2, r0, #65280
533; CHECK-NEXT:    and r1, r1, r0, lsr #8
534; CHECK-NEXT:    orr r1, r1, r0, lsr #24
535; CHECK-NEXT:    lsl r0, r0, #24
536; CHECK-NEXT:    orr r0, r0, r2, lsl #8
537; CHECK-NEXT:    orr r0, r0, r1
538; CHECK-NEXT:    mov pc, lr
539;
540; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
541; CHECK-ARMv6:       @ %bb.0:
542; CHECK-ARMv6-NEXT:    ldr r0, [r0]
543; CHECK-ARMv6-NEXT:    rev r0, r0
544; CHECK-ARMv6-NEXT:    bx lr
545;
546; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16:
547; CHECK-THUMBv6:       @ %bb.0:
548; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
549; CHECK-THUMBv6-NEXT:    rev r0, r0
550; CHECK-THUMBv6-NEXT:    bx lr
551;
552; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16:
553; CHECK-THUMBv7:       @ %bb.0:
554; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
555; CHECK-THUMBv7-NEXT:    rev r0, r0
556; CHECK-THUMBv7-NEXT:    bx lr
557
558
559  %tmp1 = load i16, ptr %arg, align 4
560  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
561  %tmp2 = zext i16 %tmp11 to i32
562  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
563  %tmp4 = load i16, ptr %tmp3, align 1
564  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
565  %tmp5 = zext i16 %tmp41 to i32
566  %tmp6 = shl nuw nsw i32 %tmp2, 16
567  %tmp7 = or i32 %tmp6, %tmp5
568  ret i32 %tmp7
569}
570
571; ptr p;
572; (i32) p[0] | (sext(p[1] << 16) to i32)
573define i32 @load_i32_by_sext_i16(ptr %arg) {
574; CHECK-LABEL: load_i32_by_sext_i16:
575; CHECK:       @ %bb.0:
576; CHECK-NEXT:    ldr r0, [r0]
577; CHECK-NEXT:    mov pc, lr
578;
579; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
580; CHECK-ARMv6:       @ %bb.0:
581; CHECK-ARMv6-NEXT:    ldr r0, [r0]
582; CHECK-ARMv6-NEXT:    bx lr
583;
584; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16:
585; CHECK-THUMBv6:       @ %bb.0:
586; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
587; CHECK-THUMBv6-NEXT:    bx lr
588;
589; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16:
590; CHECK-THUMBv7:       @ %bb.0:
591; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
592; CHECK-THUMBv7-NEXT:    bx lr
593  %tmp1 = load i16, ptr %arg, align 4
594  %tmp2 = zext i16 %tmp1 to i32
595  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
596  %tmp4 = load i16, ptr %tmp3, align 1
597  %tmp5 = sext i16 %tmp4 to i32
598  %tmp6 = shl nuw nsw i32 %tmp5, 16
599  %tmp7 = or i32 %tmp6, %tmp2
600  ret i32 %tmp7
601}
602
603; ptr arg; i32 i;
604; p = arg + 12;
605; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
606define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
607; CHECK-LABEL: load_i32_by_i8_base_offset_index:
608; CHECK:       @ %bb.0:
609; CHECK-NEXT:    add r0, r0, r1
610; CHECK-NEXT:    ldr r0, [r0, #12]
611; CHECK-NEXT:    mov pc, lr
612;
613; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
614; CHECK-ARMv6:       @ %bb.0:
615; CHECK-ARMv6-NEXT:    add r0, r0, r1
616; CHECK-ARMv6-NEXT:    ldr r0, [r0, #12]
617; CHECK-ARMv6-NEXT:    bx lr
618;
619; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index:
620; CHECK-THUMBv6:       @ %bb.0:
621; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
622; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #12]
623; CHECK-THUMBv6-NEXT:    bx lr
624;
625; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index:
626; CHECK-THUMBv7:       @ %bb.0:
627; CHECK-THUMBv7-NEXT:    add r0, r1
628; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #12]
629; CHECK-THUMBv7-NEXT:    bx lr
630
631  %tmp = add nuw nsw i32 %i, 3
632  %tmp2 = add nuw nsw i32 %i, 2
633  %tmp3 = add nuw nsw i32 %i, 1
634  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
635  %tmp5 = zext i32 %i to i64
636  %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
637  %tmp7 = load i8, ptr %tmp6, align 4
638  %tmp8 = zext i8 %tmp7 to i32
639  %tmp9 = zext i32 %tmp3 to i64
640  %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
641  %tmp11 = load i8, ptr %tmp10, align 1
642  %tmp12 = zext i8 %tmp11 to i32
643  %tmp13 = shl nuw nsw i32 %tmp12, 8
644  %tmp14 = or i32 %tmp13, %tmp8
645  %tmp15 = zext i32 %tmp2 to i64
646  %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
647  %tmp17 = load i8, ptr %tmp16, align 1
648  %tmp18 = zext i8 %tmp17 to i32
649  %tmp19 = shl nuw nsw i32 %tmp18, 16
650  %tmp20 = or i32 %tmp14, %tmp19
651  %tmp21 = zext i32 %tmp to i64
652  %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
653  %tmp23 = load i8, ptr %tmp22, align 1
654  %tmp24 = zext i8 %tmp23 to i32
655  %tmp25 = shl nuw i32 %tmp24, 24
656  %tmp26 = or i32 %tmp20, %tmp25
657  ret i32 %tmp26
658}
659
660; ptr arg; i32 i;
661; p = arg + 12;
662; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
663define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
664; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
665; CHECK:       @ %bb.0:
666; CHECK-NEXT:    add r0, r1, r0
667; CHECK-NEXT:    ldr r0, [r0, #13]
668; CHECK-NEXT:    mov pc, lr
669;
670; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
671; CHECK-ARMv6:       @ %bb.0:
672; CHECK-ARMv6-NEXT:    add r0, r1, r0
673; CHECK-ARMv6-NEXT:    ldr r0, [r0, #13]
674; CHECK-ARMv6-NEXT:    bx lr
675;
676; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2:
677; CHECK-THUMBv6:       @ %bb.0:
678; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
679; CHECK-THUMBv6-NEXT:    movs r1, #13
680; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
681; CHECK-THUMBv6-NEXT:    bx lr
682;
683; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2:
684; CHECK-THUMBv7:       @ %bb.0:
685; CHECK-THUMBv7-NEXT:    add r0, r1
686; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #13]
687; CHECK-THUMBv7-NEXT:    bx lr
688  %tmp = add nuw nsw i32 %i, 4
689  %tmp2 = add nuw nsw i32 %i, 3
690  %tmp3 = add nuw nsw i32 %i, 2
691  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
692  %tmp5 = add nuw nsw i32 %i, 1
693  %tmp27 = zext i32 %tmp5 to i64
694  %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
695  %tmp29 = load i8, ptr %tmp28, align 4
696  %tmp30 = zext i8 %tmp29 to i32
697  %tmp31 = zext i32 %tmp3 to i64
698  %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
699  %tmp33 = load i8, ptr %tmp32, align 1
700  %tmp34 = zext i8 %tmp33 to i32
701  %tmp35 = shl nuw nsw i32 %tmp34, 8
702  %tmp36 = or i32 %tmp35, %tmp30
703  %tmp37 = zext i32 %tmp2 to i64
704  %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
705  %tmp39 = load i8, ptr %tmp38, align 1
706  %tmp40 = zext i8 %tmp39 to i32
707  %tmp41 = shl nuw nsw i32 %tmp40, 16
708  %tmp42 = or i32 %tmp36, %tmp41
709  %tmp43 = zext i32 %tmp to i64
710  %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
711  %tmp45 = load i8, ptr %tmp44, align 1
712  %tmp46 = zext i8 %tmp45 to i32
713  %tmp47 = shl nuw i32 %tmp46, 24
714  %tmp48 = or i32 %tmp42, %tmp47
715  ret i32 %tmp48
716}
717
718; ptr p; // p is 2 byte aligned
719; (i32) p[0] | ((i32) p[1] << 8)
720define i32 @zext_load_i32_by_i8(ptr %arg) {
721; CHECK-LABEL: zext_load_i32_by_i8:
722; CHECK:       @ %bb.0:
723; CHECK-NEXT:    ldrh r0, [r0]
724; CHECK-NEXT:    mov pc, lr
725;
726; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
727; CHECK-ARMv6:       @ %bb.0:
728; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
729; CHECK-ARMv6-NEXT:    bx lr
730;
731; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
732; CHECK-THUMBv6:       @ %bb.0:
733; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
734; CHECK-THUMBv6-NEXT:    bx lr
735;
736; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
737; CHECK-THUMBv7:       @ %bb.0:
738; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
739; CHECK-THUMBv7-NEXT:    bx lr
740
741  %tmp2 = load i8, ptr %arg, align 2
742  %tmp3 = zext i8 %tmp2 to i32
743  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
744  %tmp5 = load i8, ptr %tmp4, align 1
745  %tmp6 = zext i8 %tmp5 to i32
746  %tmp7 = shl nuw nsw i32 %tmp6, 8
747  %tmp8 = or i32 %tmp7, %tmp3
748  ret i32 %tmp8
749}
750
751; ptr p; // p is 2 byte aligned
752; ((i32) p[0] << 8) | ((i32) p[1] << 16)
753define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
754; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
755; CHECK:       @ %bb.0:
756; CHECK-NEXT:    ldrb r1, [r0]
757; CHECK-NEXT:    ldrb r0, [r0, #1]
758; CHECK-NEXT:    lsl r0, r0, #16
759; CHECK-NEXT:    orr r0, r0, r1, lsl #8
760; CHECK-NEXT:    mov pc, lr
761;
762; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
763; CHECK-ARMv6:       @ %bb.0:
764; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
765; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
766; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
767; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
768; CHECK-ARMv6-NEXT:    bx lr
769;
770; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8:
771; CHECK-THUMBv6:       @ %bb.0:
772; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
773; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
774; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
775; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
776; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
777; CHECK-THUMBv6-NEXT:    bx lr
778;
779; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8:
780; CHECK-THUMBv7:       @ %bb.0:
781; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
782; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
783; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
784; CHECK-THUMBv7-NEXT:    orr.w r0, r0, r1, lsl #8
785; CHECK-THUMBv7-NEXT:    bx lr
786
787  %tmp2 = load i8, ptr %arg, align 2
788  %tmp3 = zext i8 %tmp2 to i32
789  %tmp30 = shl nuw nsw i32 %tmp3, 8
790  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
791  %tmp5 = load i8, ptr %tmp4, align 1
792  %tmp6 = zext i8 %tmp5 to i32
793  %tmp7 = shl nuw nsw i32 %tmp6, 16
794  %tmp8 = or i32 %tmp7, %tmp30
795  ret i32 %tmp8
796}
797
798; ptr p; // p is 2 byte aligned
799; ((i32) p[0] << 16) | ((i32) p[1] << 24)
800define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
801; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
802; CHECK:       @ %bb.0:
803; CHECK-NEXT:    ldrb r1, [r0]
804; CHECK-NEXT:    ldrb r0, [r0, #1]
805; CHECK-NEXT:    lsl r0, r0, #24
806; CHECK-NEXT:    orr r0, r0, r1, lsl #16
807; CHECK-NEXT:    mov pc, lr
808;
809; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
810; CHECK-ARMv6:       @ %bb.0:
811; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
812; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
813; CHECK-ARMv6-NEXT:    lsl r0, r0, #24
814; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #16
815; CHECK-ARMv6-NEXT:    bx lr
816;
817; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16:
818; CHECK-THUMBv6:       @ %bb.0:
819; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
820; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
821; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
822; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
823; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
824; CHECK-THUMBv6-NEXT:    bx lr
825;
826; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16:
827; CHECK-THUMBv7:       @ %bb.0:
828; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
829; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
830; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
831; CHECK-THUMBv7-NEXT:    orr.w r0, r0, r1, lsl #16
832; CHECK-THUMBv7-NEXT:    bx lr
833
834  %tmp2 = load i8, ptr %arg, align 2
835  %tmp3 = zext i8 %tmp2 to i32
836  %tmp30 = shl nuw nsw i32 %tmp3, 16
837  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
838  %tmp5 = load i8, ptr %tmp4, align 1
839  %tmp6 = zext i8 %tmp5 to i32
840  %tmp7 = shl nuw nsw i32 %tmp6, 24
841  %tmp8 = or i32 %tmp7, %tmp30
842  ret i32 %tmp8
843}
844
845; ptr p; // p is 2 byte aligned
846; (i32) p[1] | ((i32) p[0] << 8)
847define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
848; CHECK-LABEL: zext_load_i32_by_i8_bswap:
849; CHECK:       @ %bb.0:
850; CHECK-NEXT:    ldrb r1, [r0]
851; CHECK-NEXT:    ldrb r0, [r0, #1]
852; CHECK-NEXT:    orr r0, r0, r1, lsl #8
853; CHECK-NEXT:    mov pc, lr
854;
855; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
856; CHECK-ARMv6:       @ %bb.0:
857; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
858; CHECK-ARMv6-NEXT:    rev16 r0, r0
859; CHECK-ARMv6-NEXT:    bx lr
860;
861; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
862; CHECK-THUMBv6:       @ %bb.0:
863; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
864; CHECK-THUMBv6-NEXT:    rev16 r0, r0
865; CHECK-THUMBv6-NEXT:    bx lr
866;
867; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
868; CHECK-THUMBv7:       @ %bb.0:
869; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
870; CHECK-THUMBv7-NEXT:    rev16 r0, r0
871; CHECK-THUMBv7-NEXT:    bx lr
872
873  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
874  %tmp2 = load i8, ptr %tmp1, align 1
875  %tmp3 = zext i8 %tmp2 to i32
876  %tmp5 = load i8, ptr %arg, align 2
877  %tmp6 = zext i8 %tmp5 to i32
878  %tmp7 = shl nuw nsw i32 %tmp6, 8
879  %tmp8 = or i32 %tmp7, %tmp3
880  ret i32 %tmp8
881}
882
883; ptr p; // p is 2 byte aligned
884; ((i32) p[1] << 8) | ((i32) p[0] << 16)
885define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
886; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
887; CHECK:       @ %bb.0:
888; CHECK-NEXT:    ldrb r1, [r0]
889; CHECK-NEXT:    ldrb r0, [r0, #1]
890; CHECK-NEXT:    lsl r1, r1, #16
891; CHECK-NEXT:    orr r0, r1, r0, lsl #8
892; CHECK-NEXT:    mov pc, lr
893;
894; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
895; CHECK-ARMv6:       @ %bb.0:
896; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
897; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
898; CHECK-ARMv6-NEXT:    lsl r1, r1, #16
899; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #8
900; CHECK-ARMv6-NEXT:    bx lr
901;
902; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
903; CHECK-THUMBv6:       @ %bb.0:
904; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
905; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
906; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
907; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
908; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
909; CHECK-THUMBv6-NEXT:    bx lr
910;
911; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8:
912; CHECK-THUMBv7:       @ %bb.0:
913; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
914; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
915; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
916; CHECK-THUMBv7-NEXT:    orr.w r0, r1, r0, lsl #8
917; CHECK-THUMBv7-NEXT:    bx lr
918
919  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
920  %tmp2 = load i8, ptr %tmp1, align 1
921  %tmp3 = zext i8 %tmp2 to i32
922  %tmp30 = shl nuw nsw i32 %tmp3, 8
923  %tmp5 = load i8, ptr %arg, align 2
924  %tmp6 = zext i8 %tmp5 to i32
925  %tmp7 = shl nuw nsw i32 %tmp6, 16
926  %tmp8 = or i32 %tmp7, %tmp30
927  ret i32 %tmp8
928}
929
930; ptr p; // p is 2 byte aligned
931; ((i32) p[1] << 16) | ((i32) p[0] << 24)
932define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
933; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
934; CHECK:       @ %bb.0:
935; CHECK-NEXT:    ldrb r1, [r0]
936; CHECK-NEXT:    ldrb r0, [r0, #1]
937; CHECK-NEXT:    lsl r1, r1, #24
938; CHECK-NEXT:    orr r0, r1, r0, lsl #16
939; CHECK-NEXT:    mov pc, lr
940;
941; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
942; CHECK-ARMv6:       @ %bb.0:
943; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
944; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
945; CHECK-ARMv6-NEXT:    lsl r1, r1, #24
946; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #16
947; CHECK-ARMv6-NEXT:    bx lr
948;
949; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
950; CHECK-THUMBv6:       @ %bb.0:
951; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
952; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
953; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
954; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
955; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
956; CHECK-THUMBv6-NEXT:    bx lr
957;
958; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16:
959; CHECK-THUMBv7:       @ %bb.0:
960; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
961; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
962; CHECK-THUMBv7-NEXT:    lsls r1, r1, #24
963; CHECK-THUMBv7-NEXT:    orr.w r0, r1, r0, lsl #16
964; CHECK-THUMBv7-NEXT:    bx lr
965
966  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
967  %tmp2 = load i8, ptr %tmp1, align 1
968  %tmp3 = zext i8 %tmp2 to i32
969  %tmp30 = shl nuw nsw i32 %tmp3, 16
970  %tmp5 = load i8, ptr %arg, align 2
971  %tmp6 = zext i8 %tmp5 to i32
972  %tmp7 = shl nuw nsw i32 %tmp6, 24
973  %tmp8 = or i32 %tmp7, %tmp30
974  ret i32 %tmp8
975}
976