xref: /llvm-project/llvm/test/CodeGen/AArch64/load-combine.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s
3
4; ptr p; // p is 1 byte aligned
5; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
6define i32 @load_i32_by_i8_unaligned(ptr %arg) {
7; CHECK-LABEL: load_i32_by_i8_unaligned:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    ldr w0, [x0]
10; CHECK-NEXT:    ret
11  %tmp2 = load i8, ptr %arg, align 1
12  %tmp3 = zext i8 %tmp2 to i32
13  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
14  %tmp5 = load i8, ptr %tmp4, align 1
15  %tmp6 = zext i8 %tmp5 to i32
16  %tmp7 = shl nuw nsw i32 %tmp6, 8
17  %tmp8 = or i32 %tmp7, %tmp3
18  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
19  %tmp10 = load i8, ptr %tmp9, align 1
20  %tmp11 = zext i8 %tmp10 to i32
21  %tmp12 = shl nuw nsw i32 %tmp11, 16
22  %tmp13 = or i32 %tmp8, %tmp12
23  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
24  %tmp15 = load i8, ptr %tmp14, align 1
25  %tmp16 = zext i8 %tmp15 to i32
26  %tmp17 = shl nuw nsw i32 %tmp16, 24
27  %tmp18 = or i32 %tmp13, %tmp17
28  ret i32 %tmp18
29}
30
31; ptr p; // p is 4 byte aligned
32; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
33define i32 @load_i32_by_i8_aligned(ptr %arg) {
34; CHECK-LABEL: load_i32_by_i8_aligned:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ldr w0, [x0]
37; CHECK-NEXT:    ret
38  %tmp2 = load i8, ptr %arg, align 4
39  %tmp3 = zext i8 %tmp2 to i32
40  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
41  %tmp5 = load i8, ptr %tmp4, align 1
42  %tmp6 = zext i8 %tmp5 to i32
43  %tmp7 = shl nuw nsw i32 %tmp6, 8
44  %tmp8 = or i32 %tmp7, %tmp3
45  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
46  %tmp10 = load i8, ptr %tmp9, align 1
47  %tmp11 = zext i8 %tmp10 to i32
48  %tmp12 = shl nuw nsw i32 %tmp11, 16
49  %tmp13 = or i32 %tmp8, %tmp12
50  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
51  %tmp15 = load i8, ptr %tmp14, align 1
52  %tmp16 = zext i8 %tmp15 to i32
53  %tmp17 = shl nuw nsw i32 %tmp16, 24
54  %tmp18 = or i32 %tmp13, %tmp17
55  ret i32 %tmp18
56}
57
58; ptr p; // p is 4 byte aligned
59; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
60define i32 @load_i32_by_i8_bswap(ptr %arg) {
61; CHECK-LABEL: load_i32_by_i8_bswap:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    ldr w8, [x0]
64; CHECK-NEXT:    rev w0, w8
65; CHECK-NEXT:    ret
66  %tmp1 = load i8, ptr %arg, align 4
67  %tmp2 = zext i8 %tmp1 to i32
68  %tmp3 = shl nuw nsw i32 %tmp2, 24
69  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
70  %tmp5 = load i8, ptr %tmp4, align 1
71  %tmp6 = zext i8 %tmp5 to i32
72  %tmp7 = shl nuw nsw i32 %tmp6, 16
73  %tmp8 = or i32 %tmp7, %tmp3
74  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
75  %tmp10 = load i8, ptr %tmp9, align 1
76  %tmp11 = zext i8 %tmp10 to i32
77  %tmp12 = shl nuw nsw i32 %tmp11, 8
78  %tmp13 = or i32 %tmp8, %tmp12
79  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
80  %tmp15 = load i8, ptr %tmp14, align 1
81  %tmp16 = zext i8 %tmp15 to i32
82  %tmp17 = or i32 %tmp13, %tmp16
83  ret i32 %tmp17
84}
85
86; ptr p; // p is 8 byte aligned
87; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
88define i64 @load_i64_by_i8(ptr %arg) {
89; CHECK-LABEL: load_i64_by_i8:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    ldr x0, [x0]
92; CHECK-NEXT:    ret
93  %tmp1 = load i8, ptr %arg, align 8
94  %tmp2 = zext i8 %tmp1 to i64
95  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
96  %tmp4 = load i8, ptr %tmp3, align 1
97  %tmp5 = zext i8 %tmp4 to i64
98  %tmp6 = shl nuw nsw i64 %tmp5, 8
99  %tmp7 = or i64 %tmp6, %tmp2
100  %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
101  %tmp9 = load i8, ptr %tmp8, align 1
102  %tmp10 = zext i8 %tmp9 to i64
103  %tmp11 = shl nuw nsw i64 %tmp10, 16
104  %tmp12 = or i64 %tmp7, %tmp11
105  %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
106  %tmp14 = load i8, ptr %tmp13, align 1
107  %tmp15 = zext i8 %tmp14 to i64
108  %tmp16 = shl nuw nsw i64 %tmp15, 24
109  %tmp17 = or i64 %tmp12, %tmp16
110  %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
111  %tmp19 = load i8, ptr %tmp18, align 1
112  %tmp20 = zext i8 %tmp19 to i64
113  %tmp21 = shl nuw nsw i64 %tmp20, 32
114  %tmp22 = or i64 %tmp17, %tmp21
115  %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
116  %tmp24 = load i8, ptr %tmp23, align 1
117  %tmp25 = zext i8 %tmp24 to i64
118  %tmp26 = shl nuw nsw i64 %tmp25, 40
119  %tmp27 = or i64 %tmp22, %tmp26
120  %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
121  %tmp29 = load i8, ptr %tmp28, align 1
122  %tmp30 = zext i8 %tmp29 to i64
123  %tmp31 = shl nuw nsw i64 %tmp30, 48
124  %tmp32 = or i64 %tmp27, %tmp31
125  %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
126  %tmp34 = load i8, ptr %tmp33, align 1
127  %tmp35 = zext i8 %tmp34 to i64
128  %tmp36 = shl nuw i64 %tmp35, 56
129  %tmp37 = or i64 %tmp32, %tmp36
130  ret i64 %tmp37
131}
132
133; ptr p; // p is 8 byte aligned
134; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
135define i64 @load_i64_by_i8_bswap(ptr %arg) {
136; CHECK-LABEL: load_i64_by_i8_bswap:
137; CHECK:       // %bb.0:
138; CHECK-NEXT:    ldr x8, [x0]
139; CHECK-NEXT:    rev x0, x8
140; CHECK-NEXT:    ret
141  %tmp1 = load i8, ptr %arg, align 8
142  %tmp2 = zext i8 %tmp1 to i64
143  %tmp3 = shl nuw i64 %tmp2, 56
144  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
145  %tmp5 = load i8, ptr %tmp4, align 1
146  %tmp6 = zext i8 %tmp5 to i64
147  %tmp7 = shl nuw nsw i64 %tmp6, 48
148  %tmp8 = or i64 %tmp7, %tmp3
149  %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
150  %tmp10 = load i8, ptr %tmp9, align 1
151  %tmp11 = zext i8 %tmp10 to i64
152  %tmp12 = shl nuw nsw i64 %tmp11, 40
153  %tmp13 = or i64 %tmp8, %tmp12
154  %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
155  %tmp15 = load i8, ptr %tmp14, align 1
156  %tmp16 = zext i8 %tmp15 to i64
157  %tmp17 = shl nuw nsw i64 %tmp16, 32
158  %tmp18 = or i64 %tmp13, %tmp17
159  %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
160  %tmp20 = load i8, ptr %tmp19, align 1
161  %tmp21 = zext i8 %tmp20 to i64
162  %tmp22 = shl nuw nsw i64 %tmp21, 24
163  %tmp23 = or i64 %tmp18, %tmp22
164  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
165  %tmp25 = load i8, ptr %tmp24, align 1
166  %tmp26 = zext i8 %tmp25 to i64
167  %tmp27 = shl nuw nsw i64 %tmp26, 16
168  %tmp28 = or i64 %tmp23, %tmp27
169  %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
170  %tmp30 = load i8, ptr %tmp29, align 1
171  %tmp31 = zext i8 %tmp30 to i64
172  %tmp32 = shl nuw nsw i64 %tmp31, 8
173  %tmp33 = or i64 %tmp28, %tmp32
174  %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
175  %tmp35 = load i8, ptr %tmp34, align 1
176  %tmp36 = zext i8 %tmp35 to i64
177  %tmp37 = or i64 %tmp33, %tmp36
178  ret i64 %tmp37
179}
180
181; ptr p; // p[1] is 4 byte aligned
182; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
183define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
184; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    ldur w0, [x0, #1]
187; CHECK-NEXT:    ret
188
189  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
190  %tmp2 = load i8, ptr %tmp1, align 4
191  %tmp3 = zext i8 %tmp2 to i32
192  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
193  %tmp5 = load i8, ptr %tmp4, align 1
194  %tmp6 = zext i8 %tmp5 to i32
195  %tmp7 = shl nuw nsw i32 %tmp6, 8
196  %tmp8 = or i32 %tmp7, %tmp3
197  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
198  %tmp10 = load i8, ptr %tmp9, align 1
199  %tmp11 = zext i8 %tmp10 to i32
200  %tmp12 = shl nuw nsw i32 %tmp11, 16
201  %tmp13 = or i32 %tmp8, %tmp12
202  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
203  %tmp15 = load i8, ptr %tmp14, align 1
204  %tmp16 = zext i8 %tmp15 to i32
205  %tmp17 = shl nuw nsw i32 %tmp16, 24
206  %tmp18 = or i32 %tmp13, %tmp17
207  ret i32 %tmp18
208}
209
210; ptr p; // p[-4] is 4 byte aligned
211; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
212define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
213; CHECK-LABEL: load_i32_by_i8_neg_offset:
214; CHECK:       // %bb.0:
215; CHECK-NEXT:    ldur w0, [x0, #-4]
216; CHECK-NEXT:    ret
217
218  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
219  %tmp2 = load i8, ptr %tmp1, align 4
220  %tmp3 = zext i8 %tmp2 to i32
221  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
222  %tmp5 = load i8, ptr %tmp4, align 1
223  %tmp6 = zext i8 %tmp5 to i32
224  %tmp7 = shl nuw nsw i32 %tmp6, 8
225  %tmp8 = or i32 %tmp7, %tmp3
226  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
227  %tmp10 = load i8, ptr %tmp9, align 1
228  %tmp11 = zext i8 %tmp10 to i32
229  %tmp12 = shl nuw nsw i32 %tmp11, 16
230  %tmp13 = or i32 %tmp8, %tmp12
231  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
232  %tmp15 = load i8, ptr %tmp14, align 1
233  %tmp16 = zext i8 %tmp15 to i32
234  %tmp17 = shl nuw nsw i32 %tmp16, 24
235  %tmp18 = or i32 %tmp13, %tmp17
236  ret i32 %tmp18
237}
238
239; ptr p; // p[1] is 4 byte aligned
240; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
241define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
242; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
243; CHECK:       // %bb.0:
244; CHECK-NEXT:    ldur w8, [x0, #1]
245; CHECK-NEXT:    rev w0, w8
246; CHECK-NEXT:    ret
247
248  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
249  %tmp2 = load i8, ptr %tmp1, align 1
250  %tmp3 = zext i8 %tmp2 to i32
251  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
252  %tmp5 = load i8, ptr %tmp4, align 1
253  %tmp6 = zext i8 %tmp5 to i32
254  %tmp7 = shl nuw nsw i32 %tmp6, 8
255  %tmp8 = or i32 %tmp7, %tmp3
256  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
257  %tmp10 = load i8, ptr %tmp9, align 1
258  %tmp11 = zext i8 %tmp10 to i32
259  %tmp12 = shl nuw nsw i32 %tmp11, 16
260  %tmp13 = or i32 %tmp8, %tmp12
261  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
262  %tmp15 = load i8, ptr %tmp14, align 4
263  %tmp16 = zext i8 %tmp15 to i32
264  %tmp17 = shl nuw nsw i32 %tmp16, 24
265  %tmp18 = or i32 %tmp13, %tmp17
266  ret i32 %tmp18
267}
268
269; ptr p; // p[-4] is 4 byte aligned
270; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
271define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
272; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
273; CHECK:       // %bb.0:
274; CHECK-NEXT:    ldur w8, [x0, #-4]
275; CHECK-NEXT:    rev w0, w8
276; CHECK-NEXT:    ret
277
278  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
279  %tmp2 = load i8, ptr %tmp1, align 1
280  %tmp3 = zext i8 %tmp2 to i32
281  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
282  %tmp5 = load i8, ptr %tmp4, align 1
283  %tmp6 = zext i8 %tmp5 to i32
284  %tmp7 = shl nuw nsw i32 %tmp6, 8
285  %tmp8 = or i32 %tmp7, %tmp3
286  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
287  %tmp10 = load i8, ptr %tmp9, align 1
288  %tmp11 = zext i8 %tmp10 to i32
289  %tmp12 = shl nuw nsw i32 %tmp11, 16
290  %tmp13 = or i32 %tmp8, %tmp12
291  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
292  %tmp15 = load i8, ptr %tmp14, align 4
293  %tmp16 = zext i8 %tmp15 to i32
294  %tmp17 = shl nuw nsw i32 %tmp16, 24
295  %tmp18 = or i32 %tmp13, %tmp17
296  ret i32 %tmp18
297}
298
299declare i16 @llvm.bswap.i16(i16)
300
301; ptr p; // p is 4 byte aligned
302; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
303define i32 @load_i32_by_bswap_i16(ptr %arg) {
304; CHECK-LABEL: load_i32_by_bswap_i16:
305; CHECK:       // %bb.0:
306; CHECK-NEXT:    ldr w8, [x0]
307; CHECK-NEXT:    rev w0, w8
308; CHECK-NEXT:    ret
309
310  %tmp1 = load i16, ptr %arg, align 4
311  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
312  %tmp2 = zext i16 %tmp11 to i32
313  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
314  %tmp4 = load i16, ptr %tmp3, align 1
315  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
316  %tmp5 = zext i16 %tmp41 to i32
317  %tmp6 = shl nuw nsw i32 %tmp2, 16
318  %tmp7 = or i32 %tmp6, %tmp5
319  ret i32 %tmp7
320}
321
322; ptr p; // p is 4 byte aligned
323; (i32) p[0] | (sext(p[1] << 16) to i32)
324define i32 @load_i32_by_sext_i16(ptr %arg) {
325; CHECK-LABEL: load_i32_by_sext_i16:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    ldr w0, [x0]
328; CHECK-NEXT:    ret
329  %tmp1 = load i16, ptr %arg, align 4
330  %tmp2 = zext i16 %tmp1 to i32
331  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
332  %tmp4 = load i16, ptr %tmp3, align 1
333  %tmp5 = sext i16 %tmp4 to i32
334  %tmp6 = shl nuw nsw i32 %tmp5, 16
335  %tmp7 = or i32 %tmp6, %tmp2
336  ret i32 %tmp7
337}
338
339; ptr arg; i32 i;
340; p = arg + 12;
341; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
342define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
343; CHECK-LABEL: load_i32_by_i8_base_offset_index:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    add x8, x0, w1, uxtw
346; CHECK-NEXT:    ldr w0, [x8, #12]
347; CHECK-NEXT:    ret
348  %tmp = add nuw nsw i32 %i, 3
349  %tmp2 = add nuw nsw i32 %i, 2
350  %tmp3 = add nuw nsw i32 %i, 1
351  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
352  %tmp5 = zext i32 %i to i64
353  %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
354  %tmp7 = load i8, ptr %tmp6, align 4
355  %tmp8 = zext i8 %tmp7 to i32
356  %tmp9 = zext i32 %tmp3 to i64
357  %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
358  %tmp11 = load i8, ptr %tmp10, align 1
359  %tmp12 = zext i8 %tmp11 to i32
360  %tmp13 = shl nuw nsw i32 %tmp12, 8
361  %tmp14 = or i32 %tmp13, %tmp8
362  %tmp15 = zext i32 %tmp2 to i64
363  %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
364  %tmp17 = load i8, ptr %tmp16, align 1
365  %tmp18 = zext i8 %tmp17 to i32
366  %tmp19 = shl nuw nsw i32 %tmp18, 16
367  %tmp20 = or i32 %tmp14, %tmp19
368  %tmp21 = zext i32 %tmp to i64
369  %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
370  %tmp23 = load i8, ptr %tmp22, align 1
371  %tmp24 = zext i8 %tmp23 to i32
372  %tmp25 = shl nuw i32 %tmp24, 24
373  %tmp26 = or i32 %tmp20, %tmp25
374  ret i32 %tmp26
375}
376
377; ptr arg; i32 i;
378; p = arg + 12;
379; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
380define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
381; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
382; CHECK:       // %bb.0:
383; CHECK-NEXT:    add x8, x0, w1, uxtw
384; CHECK-NEXT:    ldur w0, [x8, #13]
385; CHECK-NEXT:    ret
386  %tmp = add nuw nsw i32 %i, 4
387  %tmp2 = add nuw nsw i32 %i, 3
388  %tmp3 = add nuw nsw i32 %i, 2
389  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
390  %tmp5 = add nuw nsw i32 %i, 1
391  %tmp27 = zext i32 %tmp5 to i64
392  %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
393  %tmp29 = load i8, ptr %tmp28, align 4
394  %tmp30 = zext i8 %tmp29 to i32
395  %tmp31 = zext i32 %tmp3 to i64
396  %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
397  %tmp33 = load i8, ptr %tmp32, align 1
398  %tmp34 = zext i8 %tmp33 to i32
399  %tmp35 = shl nuw nsw i32 %tmp34, 8
400  %tmp36 = or i32 %tmp35, %tmp30
401  %tmp37 = zext i32 %tmp2 to i64
402  %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
403  %tmp39 = load i8, ptr %tmp38, align 1
404  %tmp40 = zext i8 %tmp39 to i32
405  %tmp41 = shl nuw nsw i32 %tmp40, 16
406  %tmp42 = or i32 %tmp36, %tmp41
407  %tmp43 = zext i32 %tmp to i64
408  %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
409  %tmp45 = load i8, ptr %tmp44, align 1
410  %tmp46 = zext i8 %tmp45 to i32
411  %tmp47 = shl nuw i32 %tmp46, 24
412  %tmp48 = or i32 %tmp42, %tmp47
413  ret i32 %tmp48
414}
415
416; ptr p; // p is 2 byte aligned
417; (i32) p[0] | ((i32) p[1] << 8)
418define i32 @zext_load_i32_by_i8(ptr %arg) {
419; CHECK-LABEL: zext_load_i32_by_i8:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    ldrh w0, [x0]
422; CHECK-NEXT:    ret
423
424  %tmp2 = load i8, ptr %arg, align 2
425  %tmp3 = zext i8 %tmp2 to i32
426  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
427  %tmp5 = load i8, ptr %tmp4, align 1
428  %tmp6 = zext i8 %tmp5 to i32
429  %tmp7 = shl nuw nsw i32 %tmp6, 8
430  %tmp8 = or i32 %tmp7, %tmp3
431  ret i32 %tmp8
432}
433
434; ptr p; // p is 2 byte aligned
435; ((i32) p[0] << 8) | ((i32) p[1] << 16)
436define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
437; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
438; CHECK:       // %bb.0:
439; CHECK-NEXT:    ldrb w8, [x0]
440; CHECK-NEXT:    ldrb w9, [x0, #1]
441; CHECK-NEXT:    lsl w8, w8, #8
442; CHECK-NEXT:    orr w0, w8, w9, lsl #16
443; CHECK-NEXT:    ret
444
445  %tmp2 = load i8, ptr %arg, align 2
446  %tmp3 = zext i8 %tmp2 to i32
447  %tmp30 = shl nuw nsw i32 %tmp3, 8
448  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
449  %tmp5 = load i8, ptr %tmp4, align 1
450  %tmp6 = zext i8 %tmp5 to i32
451  %tmp7 = shl nuw nsw i32 %tmp6, 16
452  %tmp8 = or i32 %tmp7, %tmp30
453  ret i32 %tmp8
454}
455
456; ptr p; // p is 2 byte aligned
457; ((i32) p[0] << 16) | ((i32) p[1] << 24)
458define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
459; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
460; CHECK:       // %bb.0:
461; CHECK-NEXT:    ldrb w8, [x0]
462; CHECK-NEXT:    ldrb w9, [x0, #1]
463; CHECK-NEXT:    lsl w8, w8, #16
464; CHECK-NEXT:    orr w0, w8, w9, lsl #24
465; CHECK-NEXT:    ret
466
467  %tmp2 = load i8, ptr %arg, align 2
468  %tmp3 = zext i8 %tmp2 to i32
469  %tmp30 = shl nuw nsw i32 %tmp3, 16
470  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
471  %tmp5 = load i8, ptr %tmp4, align 1
472  %tmp6 = zext i8 %tmp5 to i32
473  %tmp7 = shl nuw nsw i32 %tmp6, 24
474  %tmp8 = or i32 %tmp7, %tmp30
475  ret i32 %tmp8
476}
477; ptr p; // p is 2 byte aligned
478; (i32) p[1] | ((i32) p[0] << 8)
479define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
480; CHECK-LABEL: zext_load_i32_by_i8_bswap:
481; CHECK:       // %bb.0:
482; CHECK-NEXT:    ldrh w8, [x0]
483; CHECK-NEXT:    rev16 w0, w8
484; CHECK-NEXT:    ret
485
486  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
487  %tmp2 = load i8, ptr %tmp1, align 1
488  %tmp3 = zext i8 %tmp2 to i32
489  %tmp5 = load i8, ptr %arg, align 2
490  %tmp6 = zext i8 %tmp5 to i32
491  %tmp7 = shl nuw nsw i32 %tmp6, 8
492  %tmp8 = or i32 %tmp7, %tmp3
493  ret i32 %tmp8
494}
495
496; ptr p; // p is 2 byte aligned
497; ((i32) p[1] << 8) | ((i32) p[0] << 16)
498define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
499; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
500; CHECK:       // %bb.0:
501; CHECK-NEXT:    ldrb w8, [x0, #1]
502; CHECK-NEXT:    ldrb w9, [x0]
503; CHECK-NEXT:    lsl w8, w8, #8
504; CHECK-NEXT:    orr w0, w8, w9, lsl #16
505; CHECK-NEXT:    ret
506
507  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
508  %tmp2 = load i8, ptr %tmp1, align 1
509  %tmp3 = zext i8 %tmp2 to i32
510  %tmp30 = shl nuw nsw i32 %tmp3, 8
511  %tmp5 = load i8, ptr %arg, align 2
512  %tmp6 = zext i8 %tmp5 to i32
513  %tmp7 = shl nuw nsw i32 %tmp6, 16
514  %tmp8 = or i32 %tmp7, %tmp30
515  ret i32 %tmp8
516}
517
518; ptr p; // p is 2 byte aligned
519; ((i32) p[1] << 16) | ((i32) p[0] << 24)
520define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
521; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
522; CHECK:       // %bb.0:
523; CHECK-NEXT:    ldrb w8, [x0, #1]
524; CHECK-NEXT:    ldrb w9, [x0]
525; CHECK-NEXT:    lsl w8, w8, #16
526; CHECK-NEXT:    orr w0, w8, w9, lsl #24
527; CHECK-NEXT:    ret
528
529  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
530  %tmp2 = load i8, ptr %tmp1, align 1
531  %tmp3 = zext i8 %tmp2 to i32
532  %tmp30 = shl nuw nsw i32 %tmp3, 16
533  %tmp5 = load i8, ptr %arg, align 2
534  %tmp6 = zext i8 %tmp5 to i32
535  %tmp7 = shl nuw nsw i32 %tmp6, 24
536  %tmp8 = or i32 %tmp7, %tmp30
537  ret i32 %tmp8
538}
539
540; x1 = x0
541define void @short_vector_to_i32(ptr %in, ptr %out, ptr %p) {
542; CHECK-LABEL: short_vector_to_i32:
543; CHECK:       // %bb.0:
544; CHECK-NEXT:    ldr w8, [x0]
545; CHECK-NEXT:    str w8, [x1]
546; CHECK-NEXT:    ret
547  %ld = load <4 x i8>, ptr %in, align 4
548
549  %e1 = extractelement <4 x i8> %ld, i32 0
550  %e2 = extractelement <4 x i8> %ld, i32 1
551  %e3 = extractelement <4 x i8> %ld, i32 2
552  %e4 = extractelement <4 x i8> %ld, i32 3
553
554  %z0 = zext i8 %e1 to i32
555  %z1 = zext i8 %e2 to i32
556  %z2 = zext i8 %e3 to i32
557  %z3 = zext i8 %e4 to i32
558
559  %s1 = shl nuw nsw i32 %z1, 8
560  %s2 = shl nuw nsw i32 %z2, 16
561  %s3 = shl nuw i32 %z3, 24
562
563  %i1 = or i32 %s1, %z0
564  %i2 = or i32 %i1, %s2
565  %i3 = or i32 %i2, %s3
566
567  store i32 %i3, ptr %out
568  ret void
569}
570
571define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
572; CHECK-LABEL: short_vector_to_i32_unused_low_i8:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    ldr s0, [x0]
575; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
576; CHECK-NEXT:    umov w8, v0.h[2]
577; CHECK-NEXT:    umov w9, v0.h[1]
578; CHECK-NEXT:    umov w10, v0.h[3]
579; CHECK-NEXT:    lsl w8, w8, #16
580; CHECK-NEXT:    bfi w8, w9, #8, #8
581; CHECK-NEXT:    orr w8, w8, w10, lsl #24
582; CHECK-NEXT:    str w8, [x1]
583; CHECK-NEXT:    ret
584  %ld = load <4 x i8>, ptr %in, align 4
585
586  %e2 = extractelement <4 x i8> %ld, i32 1
587  %e3 = extractelement <4 x i8> %ld, i32 2
588  %e4 = extractelement <4 x i8> %ld, i32 3
589
590  %z1 = zext i8 %e2 to i32
591  %z2 = zext i8 %e3 to i32
592  %z3 = zext i8 %e4 to i32
593
594  %s1 = shl nuw nsw i32 %z1, 8
595  %s2 = shl nuw nsw i32 %z2, 16
596  %s3 = shl nuw i32 %z3, 24
597
598  %i2 = or i32 %s1, %s2
599  %i3 = or i32 %i2, %s3
600
601  store i32 %i3, ptr %out
602  ret void
603}
604
605define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
606; CHECK-LABEL: short_vector_to_i32_unused_high_i8:
607; CHECK:       // %bb.0:
608; CHECK-NEXT:    ldr s0, [x0]
609; CHECK-NEXT:    ldrh w9, [x0]
610; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
611; CHECK-NEXT:    umov w8, v0.h[2]
612; CHECK-NEXT:    orr w8, w9, w8, lsl #16
613; CHECK-NEXT:    str w8, [x1]
614; CHECK-NEXT:    ret
615  %ld = load <4 x i8>, ptr %in, align 4
616
617  %e1 = extractelement <4 x i8> %ld, i32 0
618  %e2 = extractelement <4 x i8> %ld, i32 1
619  %e3 = extractelement <4 x i8> %ld, i32 2
620
621  %z0 = zext i8 %e1 to i32
622  %z1 = zext i8 %e2 to i32
623  %z2 = zext i8 %e3 to i32
624
625  %s1 = shl nuw nsw i32 %z1, 8
626  %s2 = shl nuw nsw i32 %z2, 16
627
628  %i1 = or i32 %s1, %z0
629  %i2 = or i32 %i1, %s2
630
631  store i32 %i2, ptr %out
632  ret void
633}
634
635define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
636; CHECK-LABEL: short_vector_to_i32_unused_low_i16:
637; CHECK:       // %bb.0:
638; CHECK-NEXT:    ldr s0, [x0]
639; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
640; CHECK-NEXT:    umov w8, v0.h[3]
641; CHECK-NEXT:    umov w9, v0.h[2]
642; CHECK-NEXT:    lsl w8, w8, #24
643; CHECK-NEXT:    orr w8, w8, w9, lsl #16
644; CHECK-NEXT:    str w8, [x1]
645; CHECK-NEXT:    ret
646  %ld = load <4 x i8>, ptr %in, align 4
647
648  %e3 = extractelement <4 x i8> %ld, i32 2
649  %e4 = extractelement <4 x i8> %ld, i32 3
650
651  %z2 = zext i8 %e3 to i32
652  %z3 = zext i8 %e4 to i32
653
654  %s2 = shl nuw nsw i32 %z2, 16
655  %s3 = shl nuw i32 %z3, 24
656
657  %i3 = or i32 %s2, %s3
658
659  store i32 %i3, ptr %out
660  ret void
661}
662
663; x1 = x0[0:1]
664define void @short_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) {
665; CHECK-LABEL: short_vector_to_i32_unused_high_i16:
666; CHECK:       // %bb.0:
667; CHECK-NEXT:    ldrh w8, [x0]
668; CHECK-NEXT:    str w8, [x1]
669; CHECK-NEXT:    ret
670  %ld = load <4 x i8>, ptr %in, align 4
671
672  %e1 = extractelement <4 x i8> %ld, i32 0
673  %e2 = extractelement <4 x i8> %ld, i32 1
674
675  %z0 = zext i8 %e1 to i32
676  %z1 = zext i8 %e2 to i32
677
678  %s1 = shl nuw nsw i32 %z1, 8
679
680  %i1 = or i32 %s1, %z0
681
682  store i32 %i1, ptr %out
683  ret void
684}
685
686; x1 = x0
687define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
688; CHECK-LABEL: short_vector_to_i64:
689; CHECK:       // %bb.0:
690; CHECK-NEXT:    ldr w8, [x0]
691; CHECK-NEXT:    str x8, [x1]
692; CHECK-NEXT:    ret
693  %ld = load <4 x i8>, ptr %in, align 4
694
695  %e1 = extractelement <4 x i8> %ld, i32 0
696  %e2 = extractelement <4 x i8> %ld, i32 1
697  %e3 = extractelement <4 x i8> %ld, i32 2
698  %e4 = extractelement <4 x i8> %ld, i32 3
699
700  %z0 = zext i8 %e1 to i64
701  %z1 = zext i8 %e2 to i64
702  %z2 = zext i8 %e3 to i64
703  %z3 = zext i8 %e4 to i64
704
705  %s1 = shl nuw nsw i64 %z1, 8
706  %s2 = shl nuw nsw i64 %z2, 16
707  %s3 = shl nuw i64 %z3, 24
708
709  %i1 = or i64 %s1, %z0
710  %i2 = or i64 %i1, %s2
711  %i3 = or i64 %i2, %s3
712
713  store i64 %i3, ptr %out
714  ret void
715}
716