xref: /llvm-project/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64_be-unknown | FileCheck %s
3
4; ptr p; // p is 4 byte aligned
5; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
6define i32 @load_i32_by_i8_big_endian(ptr %arg) {
7; CHECK-LABEL: load_i32_by_i8_big_endian:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    ldr w0, [x0]
10; CHECK-NEXT:    ret
11  %tmp1 = load i8, ptr %arg, align 4
12  %tmp2 = zext i8 %tmp1 to i32
13  %tmp3 = shl nuw nsw i32 %tmp2, 24
14  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
15  %tmp5 = load i8, ptr %tmp4, align 1
16  %tmp6 = zext i8 %tmp5 to i32
17  %tmp7 = shl nuw nsw i32 %tmp6, 16
18  %tmp8 = or i32 %tmp7, %tmp3
19  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
20  %tmp10 = load i8, ptr %tmp9, align 1
21  %tmp11 = zext i8 %tmp10 to i32
22  %tmp12 = shl nuw nsw i32 %tmp11, 8
23  %tmp13 = or i32 %tmp8, %tmp12
24  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
25  %tmp15 = load i8, ptr %tmp14, align 1
26  %tmp16 = zext i8 %tmp15 to i32
27  %tmp17 = or i32 %tmp13, %tmp16
28  ret i32 %tmp17
29}
30
31; ptr p; // p is 4 byte aligned
32; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
33define i32 @load_i32_by_i16_by_i8_big_endian(ptr %arg) {
34; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ldr w0, [x0]
37; CHECK-NEXT:    ret
38  %tmp1 = load i8, ptr %arg, align 4
39  %tmp2 = zext i8 %tmp1 to i16
40  %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
41  %tmp4 = load i8, ptr %tmp3, align 1
42  %tmp5 = zext i8 %tmp4 to i16
43  %tmp6 = shl nuw nsw i16 %tmp2, 8
44  %tmp7 = or i16 %tmp6, %tmp5
45  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
46  %tmp9 = load i8, ptr %tmp8, align 1
47  %tmp10 = zext i8 %tmp9 to i16
48  %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
49  %tmp12 = load i8, ptr %tmp11, align 1
50  %tmp13 = zext i8 %tmp12 to i16
51  %tmp14 = shl nuw nsw i16 %tmp10, 8
52  %tmp15 = or i16 %tmp14, %tmp13
53  %tmp16 = zext i16 %tmp7 to i32
54  %tmp17 = zext i16 %tmp15 to i32
55  %tmp18 = shl nuw nsw i32 %tmp16, 16
56  %tmp19 = or i32 %tmp18, %tmp17
57  ret i32 %tmp19
58}
59
60; ptr p; // p is 4 byte aligned
61; ((i32) p[0] << 16) | (i32) p[1]
62define i32 @load_i32_by_i16(ptr %arg) {
63; CHECK-LABEL: load_i32_by_i16:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    ldr w0, [x0]
66; CHECK-NEXT:    ret
67  %tmp1 = load i16, ptr %arg, align 4
68  %tmp2 = zext i16 %tmp1 to i32
69  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
70  %tmp4 = load i16, ptr %tmp3, align 1
71  %tmp5 = zext i16 %tmp4 to i32
72  %tmp6 = shl nuw nsw i32 %tmp2, 16
73  %tmp7 = or i32 %tmp6, %tmp5
74  ret i32 %tmp7
75}
76
77; ptr p_16; // p_16 is 4 byte aligned
78; ptr p_8 = (ptr) p_16;
79; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
80define i32 @load_i32_by_i16_i8(ptr %arg) {
81; CHECK-LABEL: load_i32_by_i16_i8:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    ldr w0, [x0]
84; CHECK-NEXT:    ret
85  %tmp2 = load i16, ptr %arg, align 4
86  %tmp3 = zext i16 %tmp2 to i32
87  %tmp4 = shl nuw nsw i32 %tmp3, 16
88  %tmp5 = getelementptr inbounds i8, ptr %arg, i32 2
89  %tmp6 = load i8, ptr %tmp5, align 1
90  %tmp7 = zext i8 %tmp6 to i32
91  %tmp8 = shl nuw nsw i32 %tmp7, 8
92  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
93  %tmp10 = load i8, ptr %tmp9, align 1
94  %tmp11 = zext i8 %tmp10 to i32
95  %tmp12 = or i32 %tmp8, %tmp11
96  %tmp13 = or i32 %tmp12, %tmp4
97  ret i32 %tmp13
98}
99
100; ptr p; // p is 8 byte aligned
101; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
102define i64 @load_i64_by_i8_bswap(ptr %arg) {
103; CHECK-LABEL: load_i64_by_i8_bswap:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    ldr x8, [x0]
106; CHECK-NEXT:    rev x0, x8
107; CHECK-NEXT:    ret
108  %tmp1 = load i8, ptr %arg, align 8
109  %tmp2 = zext i8 %tmp1 to i64
110  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
111  %tmp4 = load i8, ptr %tmp3, align 1
112  %tmp5 = zext i8 %tmp4 to i64
113  %tmp6 = shl nuw nsw i64 %tmp5, 8
114  %tmp7 = or i64 %tmp6, %tmp2
115  %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
116  %tmp9 = load i8, ptr %tmp8, align 1
117  %tmp10 = zext i8 %tmp9 to i64
118  %tmp11 = shl nuw nsw i64 %tmp10, 16
119  %tmp12 = or i64 %tmp7, %tmp11
120  %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
121  %tmp14 = load i8, ptr %tmp13, align 1
122  %tmp15 = zext i8 %tmp14 to i64
123  %tmp16 = shl nuw nsw i64 %tmp15, 24
124  %tmp17 = or i64 %tmp12, %tmp16
125  %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
126  %tmp19 = load i8, ptr %tmp18, align 1
127  %tmp20 = zext i8 %tmp19 to i64
128  %tmp21 = shl nuw nsw i64 %tmp20, 32
129  %tmp22 = or i64 %tmp17, %tmp21
130  %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
131  %tmp24 = load i8, ptr %tmp23, align 1
132  %tmp25 = zext i8 %tmp24 to i64
133  %tmp26 = shl nuw nsw i64 %tmp25, 40
134  %tmp27 = or i64 %tmp22, %tmp26
135  %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
136  %tmp29 = load i8, ptr %tmp28, align 1
137  %tmp30 = zext i8 %tmp29 to i64
138  %tmp31 = shl nuw nsw i64 %tmp30, 48
139  %tmp32 = or i64 %tmp27, %tmp31
140  %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
141  %tmp34 = load i8, ptr %tmp33, align 1
142  %tmp35 = zext i8 %tmp34 to i64
143  %tmp36 = shl nuw i64 %tmp35, 56
144  %tmp37 = or i64 %tmp32, %tmp36
145  ret i64 %tmp37
146}
147
148; ptr p; // p is 8 byte aligned
149; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
150define i64 @load_i64_by_i8(ptr %arg) {
151; CHECK-LABEL: load_i64_by_i8:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    ldr x0, [x0]
154; CHECK-NEXT:    ret
155  %tmp1 = load i8, ptr %arg, align 8
156  %tmp2 = zext i8 %tmp1 to i64
157  %tmp3 = shl nuw i64 %tmp2, 56
158  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
159  %tmp5 = load i8, ptr %tmp4, align 1
160  %tmp6 = zext i8 %tmp5 to i64
161  %tmp7 = shl nuw nsw i64 %tmp6, 48
162  %tmp8 = or i64 %tmp7, %tmp3
163  %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
164  %tmp10 = load i8, ptr %tmp9, align 1
165  %tmp11 = zext i8 %tmp10 to i64
166  %tmp12 = shl nuw nsw i64 %tmp11, 40
167  %tmp13 = or i64 %tmp8, %tmp12
168  %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
169  %tmp15 = load i8, ptr %tmp14, align 1
170  %tmp16 = zext i8 %tmp15 to i64
171  %tmp17 = shl nuw nsw i64 %tmp16, 32
172  %tmp18 = or i64 %tmp13, %tmp17
173  %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
174  %tmp20 = load i8, ptr %tmp19, align 1
175  %tmp21 = zext i8 %tmp20 to i64
176  %tmp22 = shl nuw nsw i64 %tmp21, 24
177  %tmp23 = or i64 %tmp18, %tmp22
178  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
179  %tmp25 = load i8, ptr %tmp24, align 1
180  %tmp26 = zext i8 %tmp25 to i64
181  %tmp27 = shl nuw nsw i64 %tmp26, 16
182  %tmp28 = or i64 %tmp23, %tmp27
183  %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
184  %tmp30 = load i8, ptr %tmp29, align 1
185  %tmp31 = zext i8 %tmp30 to i64
186  %tmp32 = shl nuw nsw i64 %tmp31, 8
187  %tmp33 = or i64 %tmp28, %tmp32
188  %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
189  %tmp35 = load i8, ptr %tmp34, align 1
190  %tmp36 = zext i8 %tmp35 to i64
191  %tmp37 = or i64 %tmp33, %tmp36
192  ret i64 %tmp37
193}
194
195; ptr p; // p[1] is 4 byte aligned
196; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
197define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
198; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    ldur w8, [x0, #1]
201; CHECK-NEXT:    rev w0, w8
202; CHECK-NEXT:    ret
203  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
204  %tmp2 = load i8, ptr %tmp1, align 4
205  %tmp3 = zext i8 %tmp2 to i32
206  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
207  %tmp5 = load i8, ptr %tmp4, align 1
208  %tmp6 = zext i8 %tmp5 to i32
209  %tmp7 = shl nuw nsw i32 %tmp6, 8
210  %tmp8 = or i32 %tmp7, %tmp3
211  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
212  %tmp10 = load i8, ptr %tmp9, align 1
213  %tmp11 = zext i8 %tmp10 to i32
214  %tmp12 = shl nuw nsw i32 %tmp11, 16
215  %tmp13 = or i32 %tmp8, %tmp12
216  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
217  %tmp15 = load i8, ptr %tmp14, align 1
218  %tmp16 = zext i8 %tmp15 to i32
219  %tmp17 = shl nuw nsw i32 %tmp16, 24
220  %tmp18 = or i32 %tmp13, %tmp17
221  ret i32 %tmp18
222}
223
224; ptr p; // p[-4] is 4 byte aligned
225; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
226define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
227; CHECK-LABEL: load_i32_by_i8_neg_offset:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    ldur w8, [x0, #-4]
230; CHECK-NEXT:    rev w0, w8
231; CHECK-NEXT:    ret
232  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
233  %tmp2 = load i8, ptr %tmp1, align 4
234  %tmp3 = zext i8 %tmp2 to i32
235  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
236  %tmp5 = load i8, ptr %tmp4, align 1
237  %tmp6 = zext i8 %tmp5 to i32
238  %tmp7 = shl nuw nsw i32 %tmp6, 8
239  %tmp8 = or i32 %tmp7, %tmp3
240  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
241  %tmp10 = load i8, ptr %tmp9, align 1
242  %tmp11 = zext i8 %tmp10 to i32
243  %tmp12 = shl nuw nsw i32 %tmp11, 16
244  %tmp13 = or i32 %tmp8, %tmp12
245  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
246  %tmp15 = load i8, ptr %tmp14, align 1
247  %tmp16 = zext i8 %tmp15 to i32
248  %tmp17 = shl nuw nsw i32 %tmp16, 24
249  %tmp18 = or i32 %tmp13, %tmp17
250  ret i32 %tmp18
251}
252
253; ptr p; // p[1] is 4 byte aligned
254; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
255define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
256; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
257; CHECK:       // %bb.0:
258; CHECK-NEXT:    ldur w0, [x0, #1]
259; CHECK-NEXT:    ret
260  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
261  %tmp2 = load i8, ptr %tmp1, align 1
262  %tmp3 = zext i8 %tmp2 to i32
263  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
264  %tmp5 = load i8, ptr %tmp4, align 1
265  %tmp6 = zext i8 %tmp5 to i32
266  %tmp7 = shl nuw nsw i32 %tmp6, 8
267  %tmp8 = or i32 %tmp7, %tmp3
268  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
269  %tmp10 = load i8, ptr %tmp9, align 1
270  %tmp11 = zext i8 %tmp10 to i32
271  %tmp12 = shl nuw nsw i32 %tmp11, 16
272  %tmp13 = or i32 %tmp8, %tmp12
273  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
274  %tmp15 = load i8, ptr %tmp14, align 4
275  %tmp16 = zext i8 %tmp15 to i32
276  %tmp17 = shl nuw nsw i32 %tmp16, 24
277  %tmp18 = or i32 %tmp13, %tmp17
278  ret i32 %tmp18
279}
280
281; ptr p; // p[-4] is 4 byte aligned
282; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
283define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
284; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    ldur w0, [x0, #-4]
287; CHECK-NEXT:    ret
288  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
289  %tmp2 = load i8, ptr %tmp1, align 1
290  %tmp3 = zext i8 %tmp2 to i32
291  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
292  %tmp5 = load i8, ptr %tmp4, align 1
293  %tmp6 = zext i8 %tmp5 to i32
294  %tmp7 = shl nuw nsw i32 %tmp6, 8
295  %tmp8 = or i32 %tmp7, %tmp3
296  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
297  %tmp10 = load i8, ptr %tmp9, align 1
298  %tmp11 = zext i8 %tmp10 to i32
299  %tmp12 = shl nuw nsw i32 %tmp11, 16
300  %tmp13 = or i32 %tmp8, %tmp12
301  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
302  %tmp15 = load i8, ptr %tmp14, align 4
303  %tmp16 = zext i8 %tmp15 to i32
304  %tmp17 = shl nuw nsw i32 %tmp16, 24
305  %tmp18 = or i32 %tmp13, %tmp17
306  ret i32 %tmp18
307}
308
309declare i16 @llvm.bswap.i16(i16)
310
311; ptr p; // p is 4 byte aligned
312; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
313define i32 @load_i32_by_bswap_i16(ptr %arg) {
314; CHECK-LABEL: load_i32_by_bswap_i16:
315; CHECK:       // %bb.0:
316; CHECK-NEXT:    ldr w8, [x0]
317; CHECK-NEXT:    rev w0, w8
318; CHECK-NEXT:    ret
319  %tmp1 = load i16, ptr %arg, align 4
320  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
321  %tmp2 = zext i16 %tmp11 to i32
322  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
323  %tmp4 = load i16, ptr %tmp3, align 1
324  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
325  %tmp5 = zext i16 %tmp41 to i32
326  %tmp6 = shl nuw nsw i32 %tmp5, 16
327  %tmp7 = or i32 %tmp6, %tmp2
328  ret i32 %tmp7
329}
330
331; ptr p; // p is 4 byte aligned
332; (i32) p[1] | (sext(p[0] << 16) to i32)
333define i32 @load_i32_by_sext_i16(ptr %arg) {
334; CHECK-LABEL: load_i32_by_sext_i16:
335; CHECK:       // %bb.0:
336; CHECK-NEXT:    ldr w0, [x0]
337; CHECK-NEXT:    ret
338  %tmp1 = load i16, ptr %arg, align 4
339  %tmp2 = sext i16 %tmp1 to i32
340  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
341  %tmp4 = load i16, ptr %tmp3, align 1
342  %tmp5 = zext i16 %tmp4 to i32
343  %tmp6 = shl nuw nsw i32 %tmp2, 16
344  %tmp7 = or i32 %tmp6, %tmp5
345  ret i32 %tmp7
346}
347
348; ptr arg; i32 i;
349; p = arg + 12;
350; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
351define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
352; CHECK-LABEL: load_i32_by_i8_base_offset_index:
353; CHECK:       // %bb.0:
354; CHECK-NEXT:    add x8, x0, w1, uxtw
355; CHECK-NEXT:    ldr w8, [x8, #12]
356; CHECK-NEXT:    rev w0, w8
357; CHECK-NEXT:    ret
358  %tmp = add nuw nsw i32 %i, 3
359  %tmp2 = add nuw nsw i32 %i, 2
360  %tmp3 = add nuw nsw i32 %i, 1
361  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
362  %tmp5 = zext i32 %i to i64
363  %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
364  %tmp7 = load i8, ptr %tmp6, align 4
365  %tmp8 = zext i8 %tmp7 to i32
366  %tmp9 = zext i32 %tmp3 to i64
367  %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
368  %tmp11 = load i8, ptr %tmp10, align 1
369  %tmp12 = zext i8 %tmp11 to i32
370  %tmp13 = shl nuw nsw i32 %tmp12, 8
371  %tmp14 = or i32 %tmp13, %tmp8
372  %tmp15 = zext i32 %tmp2 to i64
373  %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
374  %tmp17 = load i8, ptr %tmp16, align 1
375  %tmp18 = zext i8 %tmp17 to i32
376  %tmp19 = shl nuw nsw i32 %tmp18, 16
377  %tmp20 = or i32 %tmp14, %tmp19
378  %tmp21 = zext i32 %tmp to i64
379  %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
380  %tmp23 = load i8, ptr %tmp22, align 1
381  %tmp24 = zext i8 %tmp23 to i32
382  %tmp25 = shl nuw i32 %tmp24, 24
383  %tmp26 = or i32 %tmp20, %tmp25
384  ret i32 %tmp26
385}
386
387; ptr arg; i32 i;
388; p = arg + 12;
389; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
390define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
391; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
392; CHECK:       // %bb.0:
393; CHECK-NEXT:    add x8, x0, w1, uxtw
394; CHECK-NEXT:    ldur w8, [x8, #13]
395; CHECK-NEXT:    rev w0, w8
396; CHECK-NEXT:    ret
397  %tmp = add nuw nsw i32 %i, 4
398  %tmp2 = add nuw nsw i32 %i, 3
399  %tmp3 = add nuw nsw i32 %i, 2
400  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
401  %tmp5 = add nuw nsw i32 %i, 1
402  %tmp27 = zext i32 %tmp5 to i64
403  %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
404  %tmp29 = load i8, ptr %tmp28, align 4
405  %tmp30 = zext i8 %tmp29 to i32
406  %tmp31 = zext i32 %tmp3 to i64
407  %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
408  %tmp33 = load i8, ptr %tmp32, align 1
409  %tmp34 = zext i8 %tmp33 to i32
410  %tmp35 = shl nuw nsw i32 %tmp34, 8
411  %tmp36 = or i32 %tmp35, %tmp30
412  %tmp37 = zext i32 %tmp2 to i64
413  %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
414  %tmp39 = load i8, ptr %tmp38, align 1
415  %tmp40 = zext i8 %tmp39 to i32
416  %tmp41 = shl nuw nsw i32 %tmp40, 16
417  %tmp42 = or i32 %tmp36, %tmp41
418  %tmp43 = zext i32 %tmp to i64
419  %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
420  %tmp45 = load i8, ptr %tmp44, align 1
421  %tmp46 = zext i8 %tmp45 to i32
422  %tmp47 = shl nuw i32 %tmp46, 24
423  %tmp48 = or i32 %tmp42, %tmp47
424  ret i32 %tmp48
425}
426; ptr p; // p is 2 byte aligned
427; (i32) p[0] | ((i32) p[1] << 8)
428define i32 @zext_load_i32_by_i8(ptr %arg) {
429; CHECK-LABEL: zext_load_i32_by_i8:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    ldrh w8, [x0]
432; CHECK-NEXT:    rev16 w0, w8
433; CHECK-NEXT:    ret
434  %tmp2 = load i8, ptr %arg, align 2
435  %tmp3 = zext i8 %tmp2 to i32
436  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
437  %tmp5 = load i8, ptr %tmp4, align 1
438  %tmp6 = zext i8 %tmp5 to i32
439  %tmp7 = shl nuw nsw i32 %tmp6, 8
440  %tmp8 = or i32 %tmp7, %tmp3
441  ret i32 %tmp8
442}
443
444; ptr p; // p is 2 byte aligned
445; ((i32) p[0] << 8) | ((i32) p[1] << 16)
446define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
447; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    ldrb w8, [x0]
450; CHECK-NEXT:    ldrb w9, [x0, #1]
451; CHECK-NEXT:    lsl w8, w8, #8
452; CHECK-NEXT:    orr w0, w8, w9, lsl #16
453; CHECK-NEXT:    ret
454  %tmp2 = load i8, ptr %arg, align 2
455  %tmp3 = zext i8 %tmp2 to i32
456  %tmp30 = shl nuw nsw i32 %tmp3, 8
457  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
458  %tmp5 = load i8, ptr %tmp4, align 1
459  %tmp6 = zext i8 %tmp5 to i32
460  %tmp7 = shl nuw nsw i32 %tmp6, 16
461  %tmp8 = or i32 %tmp7, %tmp30
462  ret i32 %tmp8
463}
464
465; ptr p; // p is 2 byte aligned
466; ((i32) p[0] << 16) | ((i32) p[1] << 24)
467define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
468; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
469; CHECK:       // %bb.0:
470; CHECK-NEXT:    ldrb w8, [x0]
471; CHECK-NEXT:    ldrb w9, [x0, #1]
472; CHECK-NEXT:    lsl w8, w8, #16
473; CHECK-NEXT:    orr w0, w8, w9, lsl #24
474; CHECK-NEXT:    ret
475  %tmp2 = load i8, ptr %arg, align 2
476  %tmp3 = zext i8 %tmp2 to i32
477  %tmp30 = shl nuw nsw i32 %tmp3, 16
478  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
479  %tmp5 = load i8, ptr %tmp4, align 1
480  %tmp6 = zext i8 %tmp5 to i32
481  %tmp7 = shl nuw nsw i32 %tmp6, 24
482  %tmp8 = or i32 %tmp7, %tmp30
483  ret i32 %tmp8
484}
485; ptr p; // p is 2 byte aligned
486; (i32) p[1] | ((i32) p[0] << 8)
487define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
488; CHECK-LABEL: zext_load_i32_by_i8_bswap:
489; CHECK:       // %bb.0:
490; CHECK-NEXT:    ldrh w0, [x0]
491; CHECK-NEXT:    ret
492  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
493  %tmp2 = load i8, ptr %tmp1, align 1
494  %tmp3 = zext i8 %tmp2 to i32
495  %tmp5 = load i8, ptr %arg, align 2
496  %tmp6 = zext i8 %tmp5 to i32
497  %tmp7 = shl nuw nsw i32 %tmp6, 8
498  %tmp8 = or i32 %tmp7, %tmp3
499  ret i32 %tmp8
500}
501
502; ptr p; // p is 2 byte aligned
503; ((i32) p[1] << 8) | ((i32) p[0] << 16)
504define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
505; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
506; CHECK:       // %bb.0:
507; CHECK-NEXT:    ldrb w8, [x0, #1]
508; CHECK-NEXT:    ldrb w9, [x0]
509; CHECK-NEXT:    lsl w8, w8, #8
510; CHECK-NEXT:    orr w0, w8, w9, lsl #16
511; CHECK-NEXT:    ret
512  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
513  %tmp2 = load i8, ptr %tmp1, align 1
514  %tmp3 = zext i8 %tmp2 to i32
515  %tmp30 = shl nuw nsw i32 %tmp3, 8
516  %tmp5 = load i8, ptr %arg, align 2
517  %tmp6 = zext i8 %tmp5 to i32
518  %tmp7 = shl nuw nsw i32 %tmp6, 16
519  %tmp8 = or i32 %tmp7, %tmp30
520  ret i32 %tmp8
521}
522
523; ptr p; // p is 2 byte aligned
524; ((i32) p[1] << 16) | ((i32) p[0] << 24)
525define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
526; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
527; CHECK:       // %bb.0:
528; CHECK-NEXT:    ldrb w8, [x0, #1]
529; CHECK-NEXT:    ldrb w9, [x0]
530; CHECK-NEXT:    lsl w8, w8, #16
531; CHECK-NEXT:    orr w0, w8, w9, lsl #24
532; CHECK-NEXT:    ret
533  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
534  %tmp2 = load i8, ptr %tmp1, align 1
535  %tmp3 = zext i8 %tmp2 to i32
536  %tmp30 = shl nuw nsw i32 %tmp3, 16
537  %tmp5 = load i8, ptr %arg, align 2
538  %tmp6 = zext i8 %tmp5 to i32
539  %tmp7 = shl nuw nsw i32 %tmp6, 24
540  %tmp8 = or i32 %tmp7, %tmp30
541  ret i32 %tmp8
542}
543
544; ptr p;
545; ptr p1.i16 = (ptr) p;
546; (p1.i16[0] << 8) | ((i16) p[2])
547;
548; This is essentialy a i16 load from p[1], but we don't fold the pattern now
549; because in the original DAG we don't have p[1] address available
550define i16 @load_i16_from_nonzero_offset(ptr %p) {
551; CHECK-LABEL: load_i16_from_nonzero_offset:
552; CHECK:       // %bb.0:
553; CHECK-NEXT:    ldrh w8, [x0]
554; CHECK-NEXT:    ldrb w9, [x0, #2]
555; CHECK-NEXT:    orr w0, w9, w8, lsl #8
556; CHECK-NEXT:    ret
557  %p2.i8 = getelementptr i8, ptr %p, i64 2
558  %v1 = load i16, ptr %p
559  %v2.i8 = load i8, ptr %p2.i8
560  %v2 = zext i8 %v2.i8 to i16
561  %v1.shl = shl i16 %v1, 8
562  %res = or i16 %v1.shl, %v2
563  ret i16 %res
564}
565