xref: /llvm-project/llvm/test/CodeGen/X86/load-combine.ll (revision 25b9ed6e4964344e3710359bec4c831e5a8448b9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
4; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
6
7; ptr p;
8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9define i32 @load_i32_by_i8(ptr %arg) {
10; CHECK-LABEL: load_i32_by_i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; CHECK-NEXT:    movl (%eax), %eax
14; CHECK-NEXT:    retl
15;
16; CHECK64-LABEL: load_i32_by_i8:
17; CHECK64:       # %bb.0:
18; CHECK64-NEXT:    movl (%rdi), %eax
19; CHECK64-NEXT:    retq
20  %tmp1 = load i8, ptr %arg, align 1
21  %tmp2 = zext i8 %tmp1 to i32
22  %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
23  %tmp4 = load i8, ptr %tmp3, align 1
24  %tmp5 = zext i8 %tmp4 to i32
25  %tmp6 = shl nuw nsw i32 %tmp5, 8
26  %tmp7 = or i32 %tmp6, %tmp2
27  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
28  %tmp9 = load i8, ptr %tmp8, align 1
29  %tmp10 = zext i8 %tmp9 to i32
30  %tmp11 = shl nuw nsw i32 %tmp10, 16
31  %tmp12 = or i32 %tmp7, %tmp11
32  %tmp13 = getelementptr inbounds i8, ptr %arg, i32 3
33  %tmp14 = load i8, ptr %tmp13, align 1
34  %tmp15 = zext i8 %tmp14 to i32
35  %tmp16 = shl nuw nsw i32 %tmp15, 24
36  %tmp17 = or i32 %tmp12, %tmp16
37  ret i32 %tmp17
38}
39
40; ptr p;
41; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
42define i32 @load_i32_by_i8_bswap(ptr %arg) {
43; BSWAP-LABEL: load_i32_by_i8_bswap:
44; BSWAP:       # %bb.0:
45; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
46; BSWAP-NEXT:    movl (%eax), %eax
47; BSWAP-NEXT:    bswapl %eax
48; BSWAP-NEXT:    retl
49;
50; MOVBE-LABEL: load_i32_by_i8_bswap:
51; MOVBE:       # %bb.0:
52; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; MOVBE-NEXT:    movbel (%eax), %eax
54; MOVBE-NEXT:    retl
55;
56; BSWAP64-LABEL: load_i32_by_i8_bswap:
57; BSWAP64:       # %bb.0:
58; BSWAP64-NEXT:    movl (%rdi), %eax
59; BSWAP64-NEXT:    bswapl %eax
60; BSWAP64-NEXT:    retq
61;
62; MOVBE64-LABEL: load_i32_by_i8_bswap:
63; MOVBE64:       # %bb.0:
64; MOVBE64-NEXT:    movbel (%rdi), %eax
65; MOVBE64-NEXT:    retq
66  %tmp1 = load i8, ptr %arg, align 1
67  %tmp2 = zext i8 %tmp1 to i32
68  %tmp3 = shl nuw nsw i32 %tmp2, 24
69  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
70  %tmp5 = load i8, ptr %tmp4, align 1
71  %tmp6 = zext i8 %tmp5 to i32
72  %tmp7 = shl nuw nsw i32 %tmp6, 16
73  %tmp8 = or i32 %tmp7, %tmp3
74  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
75  %tmp10 = load i8, ptr %tmp9, align 1
76  %tmp11 = zext i8 %tmp10 to i32
77  %tmp12 = shl nuw nsw i32 %tmp11, 8
78  %tmp13 = or i32 %tmp8, %tmp12
79  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
80  %tmp15 = load i8, ptr %tmp14, align 1
81  %tmp16 = zext i8 %tmp15 to i32
82  %tmp17 = or i32 %tmp13, %tmp16
83  ret i32 %tmp17
84}
85
86; ptr p;
87; (i32) p[0] | ((i32) p[1] << 16)
88define i32 @load_i32_by_i16(ptr %arg) {
89; CHECK-LABEL: load_i32_by_i16:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
92; CHECK-NEXT:    movl (%eax), %eax
93; CHECK-NEXT:    retl
94;
95; CHECK64-LABEL: load_i32_by_i16:
96; CHECK64:       # %bb.0:
97; CHECK64-NEXT:    movl (%rdi), %eax
98; CHECK64-NEXT:    retq
99  %tmp1 = load i16, ptr %arg, align 1
100  %tmp2 = zext i16 %tmp1 to i32
101  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
102  %tmp4 = load i16, ptr %tmp3, align 1
103  %tmp5 = zext i16 %tmp4 to i32
104  %tmp6 = shl nuw nsw i32 %tmp5, 16
105  %tmp7 = or i32 %tmp6, %tmp2
106  ret i32 %tmp7
107}
108
109; ptr p_16;
110; ptr p_8 = (ptr) p_16;
111; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
112define i32 @load_i32_by_i16_i8(ptr %arg) {
113; CHECK-LABEL: load_i32_by_i16_i8:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
116; CHECK-NEXT:    movl (%eax), %eax
117; CHECK-NEXT:    retl
118;
119; CHECK64-LABEL: load_i32_by_i16_i8:
120; CHECK64:       # %bb.0:
121; CHECK64-NEXT:    movl (%rdi), %eax
122; CHECK64-NEXT:    retq
123  %tmp2 = load i16, ptr %arg, align 1
124  %tmp3 = zext i16 %tmp2 to i32
125  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
126  %tmp5 = load i8, ptr %tmp4, align 1
127  %tmp6 = zext i8 %tmp5 to i32
128  %tmp7 = shl nuw nsw i32 %tmp6, 16
129  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 3
130  %tmp9 = load i8, ptr %tmp8, align 1
131  %tmp10 = zext i8 %tmp9 to i32
132  %tmp11 = shl nuw nsw i32 %tmp10, 24
133  %tmp12 = or i32 %tmp7, %tmp11
134  %tmp13 = or i32 %tmp12, %tmp3
135  ret i32 %tmp13
136}
137
138
139; ptr p;
140; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
141define i32 @load_i32_by_i16_by_i8(ptr %arg) {
142; CHECK-LABEL: load_i32_by_i16_by_i8:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; CHECK-NEXT:    movl (%eax), %eax
146; CHECK-NEXT:    retl
147;
148; CHECK64-LABEL: load_i32_by_i16_by_i8:
149; CHECK64:       # %bb.0:
150; CHECK64-NEXT:    movl (%rdi), %eax
151; CHECK64-NEXT:    retq
152  %tmp1 = load i8, ptr %arg, align 1
153  %tmp2 = zext i8 %tmp1 to i16
154  %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
155  %tmp4 = load i8, ptr %tmp3, align 1
156  %tmp5 = zext i8 %tmp4 to i16
157  %tmp6 = shl nuw nsw i16 %tmp5, 8
158  %tmp7 = or i16 %tmp6, %tmp2
159  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
160  %tmp9 = load i8, ptr %tmp8, align 1
161  %tmp10 = zext i8 %tmp9 to i16
162  %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
163  %tmp12 = load i8, ptr %tmp11, align 1
164  %tmp13 = zext i8 %tmp12 to i16
165  %tmp14 = shl nuw nsw i16 %tmp13, 8
166  %tmp15 = or i16 %tmp14, %tmp10
167  %tmp16 = zext i16 %tmp7 to i32
168  %tmp17 = zext i16 %tmp15 to i32
169  %tmp18 = shl nuw nsw i32 %tmp17, 16
170  %tmp19 = or i32 %tmp18, %tmp16
171  ret i32 %tmp19
172}
173
174; ptr p;
175; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
176define i32 @load_i32_by_i16_by_i8_bswap(ptr %arg) {
177; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
178; BSWAP:       # %bb.0:
179; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
180; BSWAP-NEXT:    movl (%eax), %eax
181; BSWAP-NEXT:    bswapl %eax
182; BSWAP-NEXT:    retl
183;
184; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
185; MOVBE:       # %bb.0:
186; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
187; MOVBE-NEXT:    movbel (%eax), %eax
188; MOVBE-NEXT:    retl
189;
190; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
191; BSWAP64:       # %bb.0:
192; BSWAP64-NEXT:    movl (%rdi), %eax
193; BSWAP64-NEXT:    bswapl %eax
194; BSWAP64-NEXT:    retq
195;
196; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
197; MOVBE64:       # %bb.0:
198; MOVBE64-NEXT:    movbel (%rdi), %eax
199; MOVBE64-NEXT:    retq
200  %tmp1 = load i8, ptr %arg, align 1
201  %tmp2 = zext i8 %tmp1 to i16
202  %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
203  %tmp4 = load i8, ptr %tmp3, align 1
204  %tmp5 = zext i8 %tmp4 to i16
205  %tmp6 = shl nuw nsw i16 %tmp2, 8
206  %tmp7 = or i16 %tmp6, %tmp5
207  %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
208  %tmp9 = load i8, ptr %tmp8, align 1
209  %tmp10 = zext i8 %tmp9 to i16
210  %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
211  %tmp12 = load i8, ptr %tmp11, align 1
212  %tmp13 = zext i8 %tmp12 to i16
213  %tmp14 = shl nuw nsw i16 %tmp10, 8
214  %tmp15 = or i16 %tmp14, %tmp13
215  %tmp16 = zext i16 %tmp7 to i32
216  %tmp17 = zext i16 %tmp15 to i32
217  %tmp18 = shl nuw nsw i32 %tmp16, 16
218  %tmp19 = or i32 %tmp18, %tmp17
219  ret i32 %tmp19
220}
221
222; ptr p;
223; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
224define i64 @load_i64_by_i8(ptr %arg) {
225; CHECK-LABEL: load_i64_by_i8:
226; CHECK:       # %bb.0:
227; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
228; CHECK-NEXT:    movl (%ecx), %eax
229; CHECK-NEXT:    movl 4(%ecx), %edx
230; CHECK-NEXT:    retl
231;
232; CHECK64-LABEL: load_i64_by_i8:
233; CHECK64:       # %bb.0:
234; CHECK64-NEXT:    movq (%rdi), %rax
235; CHECK64-NEXT:    retq
236  %tmp1 = load i8, ptr %arg, align 1
237  %tmp2 = zext i8 %tmp1 to i64
238  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
239  %tmp4 = load i8, ptr %tmp3, align 1
240  %tmp5 = zext i8 %tmp4 to i64
241  %tmp6 = shl nuw nsw i64 %tmp5, 8
242  %tmp7 = or i64 %tmp6, %tmp2
243  %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
244  %tmp9 = load i8, ptr %tmp8, align 1
245  %tmp10 = zext i8 %tmp9 to i64
246  %tmp11 = shl nuw nsw i64 %tmp10, 16
247  %tmp12 = or i64 %tmp7, %tmp11
248  %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
249  %tmp14 = load i8, ptr %tmp13, align 1
250  %tmp15 = zext i8 %tmp14 to i64
251  %tmp16 = shl nuw nsw i64 %tmp15, 24
252  %tmp17 = or i64 %tmp12, %tmp16
253  %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
254  %tmp19 = load i8, ptr %tmp18, align 1
255  %tmp20 = zext i8 %tmp19 to i64
256  %tmp21 = shl nuw nsw i64 %tmp20, 32
257  %tmp22 = or i64 %tmp17, %tmp21
258  %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
259  %tmp24 = load i8, ptr %tmp23, align 1
260  %tmp25 = zext i8 %tmp24 to i64
261  %tmp26 = shl nuw nsw i64 %tmp25, 40
262  %tmp27 = or i64 %tmp22, %tmp26
263  %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
264  %tmp29 = load i8, ptr %tmp28, align 1
265  %tmp30 = zext i8 %tmp29 to i64
266  %tmp31 = shl nuw nsw i64 %tmp30, 48
267  %tmp32 = or i64 %tmp27, %tmp31
268  %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
269  %tmp34 = load i8, ptr %tmp33, align 1
270  %tmp35 = zext i8 %tmp34 to i64
271  %tmp36 = shl nuw i64 %tmp35, 56
272  %tmp37 = or i64 %tmp32, %tmp36
273  ret i64 %tmp37
274}
275
276; ptr p;
277; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
278define i64 @load_i64_by_i8_bswap(ptr %arg) {
279; BSWAP-LABEL: load_i64_by_i8_bswap:
280; BSWAP:       # %bb.0:
281; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
282; BSWAP-NEXT:    movl (%eax), %edx
283; BSWAP-NEXT:    movl 4(%eax), %eax
284; BSWAP-NEXT:    bswapl %eax
285; BSWAP-NEXT:    bswapl %edx
286; BSWAP-NEXT:    retl
287;
288; MOVBE-LABEL: load_i64_by_i8_bswap:
289; MOVBE:       # %bb.0:
290; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
291; MOVBE-NEXT:    movbel 4(%ecx), %eax
292; MOVBE-NEXT:    movbel (%ecx), %edx
293; MOVBE-NEXT:    retl
294;
295; BSWAP64-LABEL: load_i64_by_i8_bswap:
296; BSWAP64:       # %bb.0:
297; BSWAP64-NEXT:    movq (%rdi), %rax
298; BSWAP64-NEXT:    bswapq %rax
299; BSWAP64-NEXT:    retq
300;
301; MOVBE64-LABEL: load_i64_by_i8_bswap:
302; MOVBE64:       # %bb.0:
303; MOVBE64-NEXT:    movbeq (%rdi), %rax
304; MOVBE64-NEXT:    retq
305  %tmp1 = load i8, ptr %arg, align 1
306  %tmp2 = zext i8 %tmp1 to i64
307  %tmp3 = shl nuw i64 %tmp2, 56
308  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
309  %tmp5 = load i8, ptr %tmp4, align 1
310  %tmp6 = zext i8 %tmp5 to i64
311  %tmp7 = shl nuw nsw i64 %tmp6, 48
312  %tmp8 = or i64 %tmp7, %tmp3
313  %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
314  %tmp10 = load i8, ptr %tmp9, align 1
315  %tmp11 = zext i8 %tmp10 to i64
316  %tmp12 = shl nuw nsw i64 %tmp11, 40
317  %tmp13 = or i64 %tmp8, %tmp12
318  %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
319  %tmp15 = load i8, ptr %tmp14, align 1
320  %tmp16 = zext i8 %tmp15 to i64
321  %tmp17 = shl nuw nsw i64 %tmp16, 32
322  %tmp18 = or i64 %tmp13, %tmp17
323  %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
324  %tmp20 = load i8, ptr %tmp19, align 1
325  %tmp21 = zext i8 %tmp20 to i64
326  %tmp22 = shl nuw nsw i64 %tmp21, 24
327  %tmp23 = or i64 %tmp18, %tmp22
328  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
329  %tmp25 = load i8, ptr %tmp24, align 1
330  %tmp26 = zext i8 %tmp25 to i64
331  %tmp27 = shl nuw nsw i64 %tmp26, 16
332  %tmp28 = or i64 %tmp23, %tmp27
333  %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
334  %tmp30 = load i8, ptr %tmp29, align 1
335  %tmp31 = zext i8 %tmp30 to i64
336  %tmp32 = shl nuw nsw i64 %tmp31, 8
337  %tmp33 = or i64 %tmp28, %tmp32
338  %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
339  %tmp35 = load i8, ptr %tmp34, align 1
340  %tmp36 = zext i8 %tmp35 to i64
341  %tmp37 = or i64 %tmp33, %tmp36
342  ret i64 %tmp37
343}
344
345; Part of the load by bytes pattern is used outside of the pattern
346; ptr p;
347; i32 x = (i32) p[1]
348; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
349; x | res
350define i32 @load_i32_by_i8_bswap_uses(ptr %arg) {
351; CHECK-LABEL: load_i32_by_i8_bswap_uses:
352; CHECK:       # %bb.0:
353; CHECK-NEXT:    pushl %esi
354; CHECK-NEXT:    .cfi_def_cfa_offset 8
355; CHECK-NEXT:    .cfi_offset %esi, -8
356; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
357; CHECK-NEXT:    movzbl (%eax), %ecx
358; CHECK-NEXT:    shll $24, %ecx
359; CHECK-NEXT:    movzbl 1(%eax), %edx
360; CHECK-NEXT:    movl %edx, %esi
361; CHECK-NEXT:    shll $16, %esi
362; CHECK-NEXT:    orl %ecx, %esi
363; CHECK-NEXT:    movzbl 2(%eax), %ecx
364; CHECK-NEXT:    shll $8, %ecx
365; CHECK-NEXT:    orl %esi, %ecx
366; CHECK-NEXT:    movzbl 3(%eax), %eax
367; CHECK-NEXT:    orl %ecx, %eax
368; CHECK-NEXT:    orl %edx, %eax
369; CHECK-NEXT:    popl %esi
370; CHECK-NEXT:    .cfi_def_cfa_offset 4
371; CHECK-NEXT:    retl
372;
373; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
374; CHECK64:       # %bb.0:
375; CHECK64-NEXT:    movzbl (%rdi), %eax
376; CHECK64-NEXT:    shll $24, %eax
377; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
378; CHECK64-NEXT:    movl %ecx, %edx
379; CHECK64-NEXT:    shll $16, %edx
380; CHECK64-NEXT:    orl %eax, %edx
381; CHECK64-NEXT:    movzbl 2(%rdi), %esi
382; CHECK64-NEXT:    shll $8, %esi
383; CHECK64-NEXT:    orl %edx, %esi
384; CHECK64-NEXT:    movzbl 3(%rdi), %eax
385; CHECK64-NEXT:    orl %esi, %eax
386; CHECK64-NEXT:    orl %ecx, %eax
387; CHECK64-NEXT:    retq
388  %tmp1 = load i8, ptr %arg, align 1
389  %tmp2 = zext i8 %tmp1 to i32
390  %tmp3 = shl nuw nsw i32 %tmp2, 24
391  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
392  %tmp5 = load i8, ptr %tmp4, align 1
393  %tmp6 = zext i8 %tmp5 to i32
394  %tmp7 = shl nuw nsw i32 %tmp6, 16
395  %tmp8 = or i32 %tmp7, %tmp3
396  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
397  %tmp10 = load i8, ptr %tmp9, align 1
398  %tmp11 = zext i8 %tmp10 to i32
399  %tmp12 = shl nuw nsw i32 %tmp11, 8
400  %tmp13 = or i32 %tmp8, %tmp12
401  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
402  %tmp15 = load i8, ptr %tmp14, align 1
403  %tmp16 = zext i8 %tmp15 to i32
404  %tmp17 = or i32 %tmp13, %tmp16
405  ; Use individual part of the pattern outside of the pattern
406  %tmp18 = or i32 %tmp6, %tmp17
407  ret i32 %tmp18
408}
409
410; One of the loads is volatile
411; ptr p;
412; p0 = volatile *p;
413; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
414define i32 @load_i32_by_i8_bswap_volatile(ptr %arg) {
415; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
418; CHECK-NEXT:    movzbl (%eax), %ecx
419; CHECK-NEXT:    shll $24, %ecx
420; CHECK-NEXT:    movzbl 1(%eax), %edx
421; CHECK-NEXT:    shll $16, %edx
422; CHECK-NEXT:    orl %ecx, %edx
423; CHECK-NEXT:    movzbl 2(%eax), %ecx
424; CHECK-NEXT:    shll $8, %ecx
425; CHECK-NEXT:    orl %edx, %ecx
426; CHECK-NEXT:    movzbl 3(%eax), %eax
427; CHECK-NEXT:    orl %ecx, %eax
428; CHECK-NEXT:    retl
429;
430; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
431; CHECK64:       # %bb.0:
432; CHECK64-NEXT:    movzbl (%rdi), %eax
433; CHECK64-NEXT:    shll $24, %eax
434; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
435; CHECK64-NEXT:    shll $16, %ecx
436; CHECK64-NEXT:    orl %eax, %ecx
437; CHECK64-NEXT:    movzbl 2(%rdi), %edx
438; CHECK64-NEXT:    shll $8, %edx
439; CHECK64-NEXT:    orl %ecx, %edx
440; CHECK64-NEXT:    movzbl 3(%rdi), %eax
441; CHECK64-NEXT:    orl %edx, %eax
442; CHECK64-NEXT:    retq
443  %tmp1 = load volatile i8, ptr %arg, align 1
444  %tmp2 = zext i8 %tmp1 to i32
445  %tmp3 = shl nuw nsw i32 %tmp2, 24
446  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
447  %tmp5 = load i8, ptr %tmp4, align 1
448  %tmp6 = zext i8 %tmp5 to i32
449  %tmp7 = shl nuw nsw i32 %tmp6, 16
450  %tmp8 = or i32 %tmp7, %tmp3
451  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
452  %tmp10 = load i8, ptr %tmp9, align 1
453  %tmp11 = zext i8 %tmp10 to i32
454  %tmp12 = shl nuw nsw i32 %tmp11, 8
455  %tmp13 = or i32 %tmp8, %tmp12
456  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
457  %tmp15 = load i8, ptr %tmp14, align 1
458  %tmp16 = zext i8 %tmp15 to i32
459  %tmp17 = or i32 %tmp13, %tmp16
460  ret i32 %tmp17
461}
462
463; There is a store in between individual loads
464; ptr p, q;
465; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
466; *q = 0;
467; res2 = ((i32) p[2] << 8) | (i32) p[3]
468; res1 | res2
469define i32 @load_i32_by_i8_bswap_store_in_between(ptr %arg, ptr %arg1) {
470; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    pushl %esi
473; CHECK-NEXT:    .cfi_def_cfa_offset 8
474; CHECK-NEXT:    .cfi_offset %esi, -8
475; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
476; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
477; CHECK-NEXT:    movzbl (%eax), %edx
478; CHECK-NEXT:    shll $24, %edx
479; CHECK-NEXT:    movzbl 1(%eax), %esi
480; CHECK-NEXT:    movl $0, (%ecx)
481; CHECK-NEXT:    shll $16, %esi
482; CHECK-NEXT:    orl %edx, %esi
483; CHECK-NEXT:    movzbl 2(%eax), %ecx
484; CHECK-NEXT:    shll $8, %ecx
485; CHECK-NEXT:    orl %esi, %ecx
486; CHECK-NEXT:    movzbl 3(%eax), %eax
487; CHECK-NEXT:    orl %ecx, %eax
488; CHECK-NEXT:    popl %esi
489; CHECK-NEXT:    .cfi_def_cfa_offset 4
490; CHECK-NEXT:    retl
491;
492; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
493; CHECK64:       # %bb.0:
494; CHECK64-NEXT:    movzbl (%rdi), %eax
495; CHECK64-NEXT:    shll $24, %eax
496; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
497; CHECK64-NEXT:    movl $0, (%rsi)
498; CHECK64-NEXT:    shll $16, %ecx
499; CHECK64-NEXT:    orl %eax, %ecx
500; CHECK64-NEXT:    movzbl 2(%rdi), %edx
501; CHECK64-NEXT:    shll $8, %edx
502; CHECK64-NEXT:    orl %ecx, %edx
503; CHECK64-NEXT:    movzbl 3(%rdi), %eax
504; CHECK64-NEXT:    orl %edx, %eax
505; CHECK64-NEXT:    retq
506  %tmp2 = load i8, ptr %arg, align 1
507  %tmp3 = zext i8 %tmp2 to i32
508  %tmp4 = shl nuw nsw i32 %tmp3, 24
509  %tmp5 = getelementptr inbounds i8, ptr %arg, i32 1
510  %tmp6 = load i8, ptr %tmp5, align 1
511  ; This store will prevent folding of the pattern
512  store i32 0, ptr %arg1
513  %tmp7 = zext i8 %tmp6 to i32
514  %tmp8 = shl nuw nsw i32 %tmp7, 16
515  %tmp9 = or i32 %tmp8, %tmp4
516  %tmp10 = getelementptr inbounds i8, ptr %arg, i32 2
517  %tmp11 = load i8, ptr %tmp10, align 1
518  %tmp12 = zext i8 %tmp11 to i32
519  %tmp13 = shl nuw nsw i32 %tmp12, 8
520  %tmp14 = or i32 %tmp9, %tmp13
521  %tmp15 = getelementptr inbounds i8, ptr %arg, i32 3
522  %tmp16 = load i8, ptr %tmp15, align 1
523  %tmp17 = zext i8 %tmp16 to i32
524  %tmp18 = or i32 %tmp14, %tmp17
525  ret i32 %tmp18
526}
527
528; One of the loads is from an unrelated location
529; ptr p, q;
530; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
531define i32 @load_i32_by_i8_bswap_unrelated_load(ptr %arg, ptr %arg1) {
532; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
533; CHECK:       # %bb.0:
534; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
535; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
536; CHECK-NEXT:    movzbl (%ecx), %edx
537; CHECK-NEXT:    shll $24, %edx
538; CHECK-NEXT:    movzbl 1(%eax), %eax
539; CHECK-NEXT:    shll $16, %eax
540; CHECK-NEXT:    orl %edx, %eax
541; CHECK-NEXT:    movzbl 2(%ecx), %edx
542; CHECK-NEXT:    shll $8, %edx
543; CHECK-NEXT:    orl %eax, %edx
544; CHECK-NEXT:    movzbl 3(%ecx), %eax
545; CHECK-NEXT:    orl %edx, %eax
546; CHECK-NEXT:    retl
547;
548; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
549; CHECK64:       # %bb.0:
550; CHECK64-NEXT:    movzbl (%rdi), %eax
551; CHECK64-NEXT:    shll $24, %eax
552; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
553; CHECK64-NEXT:    shll $16, %ecx
554; CHECK64-NEXT:    orl %eax, %ecx
555; CHECK64-NEXT:    movzbl 2(%rdi), %edx
556; CHECK64-NEXT:    shll $8, %edx
557; CHECK64-NEXT:    orl %ecx, %edx
558; CHECK64-NEXT:    movzbl 3(%rdi), %eax
559; CHECK64-NEXT:    orl %edx, %eax
560; CHECK64-NEXT:    retq
561  %tmp3 = load i8, ptr %arg, align 1
562  %tmp4 = zext i8 %tmp3 to i32
563  %tmp5 = shl nuw nsw i32 %tmp4, 24
564  ; Load from an unrelated address
565  %tmp6 = getelementptr inbounds i8, ptr %arg1, i32 1
566  %tmp7 = load i8, ptr %tmp6, align 1
567  %tmp8 = zext i8 %tmp7 to i32
568  %tmp9 = shl nuw nsw i32 %tmp8, 16
569  %tmp10 = or i32 %tmp9, %tmp5
570  %tmp11 = getelementptr inbounds i8, ptr %arg, i32 2
571  %tmp12 = load i8, ptr %tmp11, align 1
572  %tmp13 = zext i8 %tmp12 to i32
573  %tmp14 = shl nuw nsw i32 %tmp13, 8
574  %tmp15 = or i32 %tmp10, %tmp14
575  %tmp16 = getelementptr inbounds i8, ptr %arg, i32 3
576  %tmp17 = load i8, ptr %tmp16, align 1
577  %tmp18 = zext i8 %tmp17 to i32
578  %tmp19 = or i32 %tmp15, %tmp18
579  ret i32 %tmp19
580}
581
582; ptr p;
583; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
584define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
585; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
586; CHECK:       # %bb.0:
587; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
588; CHECK-NEXT:    movl 1(%eax), %eax
589; CHECK-NEXT:    retl
590;
591; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
592; CHECK64:       # %bb.0:
593; CHECK64-NEXT:    movl 1(%rdi), %eax
594; CHECK64-NEXT:    retq
595  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
596  %tmp2 = load i8, ptr %tmp1, align 1
597  %tmp3 = zext i8 %tmp2 to i32
598  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
599  %tmp5 = load i8, ptr %tmp4, align 1
600  %tmp6 = zext i8 %tmp5 to i32
601  %tmp7 = shl nuw nsw i32 %tmp6, 8
602  %tmp8 = or i32 %tmp7, %tmp3
603  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
604  %tmp10 = load i8, ptr %tmp9, align 1
605  %tmp11 = zext i8 %tmp10 to i32
606  %tmp12 = shl nuw nsw i32 %tmp11, 16
607  %tmp13 = or i32 %tmp8, %tmp12
608  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
609  %tmp15 = load i8, ptr %tmp14, align 1
610  %tmp16 = zext i8 %tmp15 to i32
611  %tmp17 = shl nuw nsw i32 %tmp16, 24
612  %tmp18 = or i32 %tmp13, %tmp17
613  ret i32 %tmp18
614}
615
616; ptr p;
617; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
618define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
619; CHECK-LABEL: load_i32_by_i8_neg_offset:
620; CHECK:       # %bb.0:
621; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
622; CHECK-NEXT:    movl -4(%eax), %eax
623; CHECK-NEXT:    retl
624;
625; CHECK64-LABEL: load_i32_by_i8_neg_offset:
626; CHECK64:       # %bb.0:
627; CHECK64-NEXT:    movl -4(%rdi), %eax
628; CHECK64-NEXT:    retq
629  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
630  %tmp2 = load i8, ptr %tmp1, align 1
631  %tmp3 = zext i8 %tmp2 to i32
632  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
633  %tmp5 = load i8, ptr %tmp4, align 1
634  %tmp6 = zext i8 %tmp5 to i32
635  %tmp7 = shl nuw nsw i32 %tmp6, 8
636  %tmp8 = or i32 %tmp7, %tmp3
637  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
638  %tmp10 = load i8, ptr %tmp9, align 1
639  %tmp11 = zext i8 %tmp10 to i32
640  %tmp12 = shl nuw nsw i32 %tmp11, 16
641  %tmp13 = or i32 %tmp8, %tmp12
642  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
643  %tmp15 = load i8, ptr %tmp14, align 1
644  %tmp16 = zext i8 %tmp15 to i32
645  %tmp17 = shl nuw nsw i32 %tmp16, 24
646  %tmp18 = or i32 %tmp13, %tmp17
647  ret i32 %tmp18
648}
649
650; ptr p;
651; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
652define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
653; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
654; BSWAP:       # %bb.0:
655; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
656; BSWAP-NEXT:    movl 1(%eax), %eax
657; BSWAP-NEXT:    bswapl %eax
658; BSWAP-NEXT:    retl
659;
660; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
661; MOVBE:       # %bb.0:
662; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
663; MOVBE-NEXT:    movbel 1(%eax), %eax
664; MOVBE-NEXT:    retl
665;
666; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
667; BSWAP64:       # %bb.0:
668; BSWAP64-NEXT:    movl 1(%rdi), %eax
669; BSWAP64-NEXT:    bswapl %eax
670; BSWAP64-NEXT:    retq
671;
672; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
673; MOVBE64:       # %bb.0:
674; MOVBE64-NEXT:    movbel 1(%rdi), %eax
675; MOVBE64-NEXT:    retq
676  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
677  %tmp2 = load i8, ptr %tmp1, align 1
678  %tmp3 = zext i8 %tmp2 to i32
679  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
680  %tmp5 = load i8, ptr %tmp4, align 1
681  %tmp6 = zext i8 %tmp5 to i32
682  %tmp7 = shl nuw nsw i32 %tmp6, 8
683  %tmp8 = or i32 %tmp7, %tmp3
684  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
685  %tmp10 = load i8, ptr %tmp9, align 1
686  %tmp11 = zext i8 %tmp10 to i32
687  %tmp12 = shl nuw nsw i32 %tmp11, 16
688  %tmp13 = or i32 %tmp8, %tmp12
689  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
690  %tmp15 = load i8, ptr %tmp14, align 1
691  %tmp16 = zext i8 %tmp15 to i32
692  %tmp17 = shl nuw nsw i32 %tmp16, 24
693  %tmp18 = or i32 %tmp13, %tmp17
694  ret i32 %tmp18
695}
696
697; ptr p;
698; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
699define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
700; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
701; BSWAP:       # %bb.0:
702; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
703; BSWAP-NEXT:    movl -4(%eax), %eax
704; BSWAP-NEXT:    bswapl %eax
705; BSWAP-NEXT:    retl
706;
707; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
708; MOVBE:       # %bb.0:
709; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
710; MOVBE-NEXT:    movbel -4(%eax), %eax
711; MOVBE-NEXT:    retl
712;
713; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
714; BSWAP64:       # %bb.0:
715; BSWAP64-NEXT:    movl -4(%rdi), %eax
716; BSWAP64-NEXT:    bswapl %eax
717; BSWAP64-NEXT:    retq
718;
719; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
720; MOVBE64:       # %bb.0:
721; MOVBE64-NEXT:    movbel -4(%rdi), %eax
722; MOVBE64-NEXT:    retq
723  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
724  %tmp2 = load i8, ptr %tmp1, align 1
725  %tmp3 = zext i8 %tmp2 to i32
726  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
727  %tmp5 = load i8, ptr %tmp4, align 1
728  %tmp6 = zext i8 %tmp5 to i32
729  %tmp7 = shl nuw nsw i32 %tmp6, 8
730  %tmp8 = or i32 %tmp7, %tmp3
731  %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
732  %tmp10 = load i8, ptr %tmp9, align 1
733  %tmp11 = zext i8 %tmp10 to i32
734  %tmp12 = shl nuw nsw i32 %tmp11, 16
735  %tmp13 = or i32 %tmp8, %tmp12
736  %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
737  %tmp15 = load i8, ptr %tmp14, align 1
738  %tmp16 = zext i8 %tmp15 to i32
739  %tmp17 = shl nuw nsw i32 %tmp16, 24
740  %tmp18 = or i32 %tmp13, %tmp17
741  ret i32 %tmp18
742}
743
744; ptr p; i32 i;
745; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
746define i32 @load_i32_by_i8_bswap_base_index_offset(ptr %arg, i32 %arg1) {
747; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
748; BSWAP:       # %bb.0:
749; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
750; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %ecx
751; BSWAP-NEXT:    movl (%ecx,%eax), %eax
752; BSWAP-NEXT:    bswapl %eax
753; BSWAP-NEXT:    retl
754;
755; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
756; MOVBE:       # %bb.0:
757; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
758; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
759; MOVBE-NEXT:    movbel (%ecx,%eax), %eax
760; MOVBE-NEXT:    retl
761;
762; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
763; BSWAP64:       # %bb.0:
764; BSWAP64-NEXT:    movslq %esi, %rax
765; BSWAP64-NEXT:    movl (%rdi,%rax), %eax
766; BSWAP64-NEXT:    bswapl %eax
767; BSWAP64-NEXT:    retq
768;
769; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
770; MOVBE64:       # %bb.0:
771; MOVBE64-NEXT:    movslq %esi, %rax
772; MOVBE64-NEXT:    movbel (%rdi,%rax), %eax
773; MOVBE64-NEXT:    retq
774  %tmp2 = getelementptr inbounds i8, ptr %arg, i32 %arg1
775  %tmp3 = load i8, ptr %tmp2, align 1
776  %tmp4 = zext i8 %tmp3 to i32
777  %tmp5 = shl nuw nsw i32 %tmp4, 24
778  %tmp6 = add nuw nsw i32 %arg1, 1
779  %tmp7 = getelementptr inbounds i8, ptr %arg, i32 %tmp6
780  %tmp8 = load i8, ptr %tmp7, align 1
781  %tmp9 = zext i8 %tmp8 to i32
782  %tmp10 = shl nuw nsw i32 %tmp9, 16
783  %tmp11 = or i32 %tmp10, %tmp5
784  %tmp12 = add nuw nsw i32 %arg1, 2
785  %tmp13 = getelementptr inbounds i8, ptr %arg, i32 %tmp12
786  %tmp14 = load i8, ptr %tmp13, align 1
787  %tmp15 = zext i8 %tmp14 to i32
788  %tmp16 = shl nuw nsw i32 %tmp15, 8
789  %tmp17 = or i32 %tmp11, %tmp16
790  %tmp18 = add nuw nsw i32 %arg1, 3
791  %tmp19 = getelementptr inbounds i8, ptr %arg, i32 %tmp18
792  %tmp20 = load i8, ptr %tmp19, align 1
793  %tmp21 = zext i8 %tmp20 to i32
794  %tmp22 = or i32 %tmp17, %tmp21
795  ret i32 %tmp22
796}
797
798; Verify that we don't crash handling shl i32 %conv57, 32
799define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
800; CHECK-LABEL: shift_i32_by_32:
801; CHECK:       # %bb.0: # %entry
802; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
803; CHECK-NEXT:    movl $-1, 4(%eax)
804; CHECK-NEXT:    movl $-1, (%eax)
805; CHECK-NEXT:    retl
806;
807; CHECK64-LABEL: shift_i32_by_32:
808; CHECK64:       # %bb.0: # %entry
809; CHECK64-NEXT:    movq $-1, (%rdx)
810; CHECK64-NEXT:    retq
811entry:
812  %load1 = load i8, ptr %src1, align 1
813  %conv46 = zext i8 %load1 to i32
814  %shl47 = shl i32 %conv46, 56
815  %or55 = or i32 %shl47, 0
816  %load2 = load i8, ptr %src2, align 1
817  %conv57 = zext i8 %load2 to i32
818  %shl58 = shl i32 %conv57, 32
819  %or59 = or i32 %or55, %shl58
820  %or74 = or i32 %or59, 0
821  %conv75 = sext i32 %or74 to i64
822  store i64 %conv75, ptr %dst, align 8
823  ret void
824}
825
826declare i16 @llvm.bswap.i16(i16)
827
828; ptr p;
829; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
830define i32 @load_i32_by_bswap_i16(ptr %arg) {
831; BSWAP-LABEL: load_i32_by_bswap_i16:
832; BSWAP:       # %bb.0:
833; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
834; BSWAP-NEXT:    movl (%eax), %eax
835; BSWAP-NEXT:    bswapl %eax
836; BSWAP-NEXT:    retl
837;
838; MOVBE-LABEL: load_i32_by_bswap_i16:
839; MOVBE:       # %bb.0:
840; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
841; MOVBE-NEXT:    movbel (%eax), %eax
842; MOVBE-NEXT:    retl
843;
844; BSWAP64-LABEL: load_i32_by_bswap_i16:
845; BSWAP64:       # %bb.0:
846; BSWAP64-NEXT:    movl (%rdi), %eax
847; BSWAP64-NEXT:    bswapl %eax
848; BSWAP64-NEXT:    retq
849;
850; MOVBE64-LABEL: load_i32_by_bswap_i16:
851; MOVBE64:       # %bb.0:
852; MOVBE64-NEXT:    movbel (%rdi), %eax
853; MOVBE64-NEXT:    retq
854  %tmp1 = load i16, ptr %arg, align 4
855  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
856  %tmp2 = zext i16 %tmp11 to i32
857  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
858  %tmp4 = load i16, ptr %tmp3, align 1
859  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
860  %tmp5 = zext i16 %tmp41 to i32
861  %tmp6 = shl nuw nsw i32 %tmp2, 16
862  %tmp7 = or i32 %tmp6, %tmp5
863  ret i32 %tmp7
864}
865
866; ptr p;
867; (i32) p[0] | (sext(p[1] << 16) to i32)
868define i32 @load_i32_by_sext_i16(ptr %arg) {
869; CHECK-LABEL: load_i32_by_sext_i16:
870; CHECK:       # %bb.0:
871; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
872; CHECK-NEXT:    movl (%eax), %eax
873; CHECK-NEXT:    retl
874;
875; CHECK64-LABEL: load_i32_by_sext_i16:
876; CHECK64:       # %bb.0:
877; CHECK64-NEXT:    movl (%rdi), %eax
878; CHECK64-NEXT:    retq
879  %tmp1 = load i16, ptr %arg, align 1
880  %tmp2 = zext i16 %tmp1 to i32
881  %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
882  %tmp4 = load i16, ptr %tmp3, align 1
883  %tmp5 = sext i16 %tmp4 to i32
884  %tmp6 = shl nuw nsw i32 %tmp5, 16
885  %tmp7 = or i32 %tmp6, %tmp2
886  ret i32 %tmp7
887}
888
889; ptr arg; i32 i;
890; p = arg + 12;
891; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
892define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
893; CHECK-LABEL: load_i32_by_i8_base_offset_index:
894; CHECK:       # %bb.0:
895; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
896; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
897; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
898; CHECK-NEXT:    retl
899;
900; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
901; CHECK64:       # %bb.0:
902; CHECK64-NEXT:    movl %esi, %eax
903; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
904; CHECK64-NEXT:    retq
905  %tmp = add nuw nsw i32 %i, 3
906  %tmp2 = add nuw nsw i32 %i, 2
907  %tmp3 = add nuw nsw i32 %i, 1
908  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
909  %tmp5 = zext i32 %i to i64
910  %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
911  %tmp7 = load i8, ptr %tmp6, align 1
912  %tmp8 = zext i8 %tmp7 to i32
913  %tmp9 = zext i32 %tmp3 to i64
914  %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
915  %tmp11 = load i8, ptr %tmp10, align 1
916  %tmp12 = zext i8 %tmp11 to i32
917  %tmp13 = shl nuw nsw i32 %tmp12, 8
918  %tmp14 = or i32 %tmp13, %tmp8
919  %tmp15 = zext i32 %tmp2 to i64
920  %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
921  %tmp17 = load i8, ptr %tmp16, align 1
922  %tmp18 = zext i8 %tmp17 to i32
923  %tmp19 = shl nuw nsw i32 %tmp18, 16
924  %tmp20 = or i32 %tmp14, %tmp19
925  %tmp21 = zext i32 %tmp to i64
926  %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
927  %tmp23 = load i8, ptr %tmp22, align 1
928  %tmp24 = zext i8 %tmp23 to i32
929  %tmp25 = shl nuw i32 %tmp24, 24
930  %tmp26 = or i32 %tmp20, %tmp25
931  ret i32 %tmp26
932}
933
934; ptr arg; i32 i;
935; p = arg + 12;
936; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
937define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
938; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
939; CHECK:       # %bb.0:
940; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
941; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
942; CHECK-NEXT:    movl 13(%eax,%ecx), %eax
943; CHECK-NEXT:    retl
944;
945; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
946; CHECK64:       # %bb.0:
947; CHECK64-NEXT:    movl %esi, %eax
948; CHECK64-NEXT:    movl 13(%rax,%rdi), %eax
949; CHECK64-NEXT:    retq
950  %tmp = add nuw nsw i32 %i, 4
951  %tmp2 = add nuw nsw i32 %i, 3
952  %tmp3 = add nuw nsw i32 %i, 2
953  %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
954  %tmp5 = add nuw nsw i32 %i, 1
955  %tmp27 = zext i32 %tmp5 to i64
956  %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
957  %tmp29 = load i8, ptr %tmp28, align 1
958  %tmp30 = zext i8 %tmp29 to i32
959  %tmp31 = zext i32 %tmp3 to i64
960  %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
961  %tmp33 = load i8, ptr %tmp32, align 1
962  %tmp34 = zext i8 %tmp33 to i32
963  %tmp35 = shl nuw nsw i32 %tmp34, 8
964  %tmp36 = or i32 %tmp35, %tmp30
965  %tmp37 = zext i32 %tmp2 to i64
966  %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
967  %tmp39 = load i8, ptr %tmp38, align 1
968  %tmp40 = zext i8 %tmp39 to i32
969  %tmp41 = shl nuw nsw i32 %tmp40, 16
970  %tmp42 = or i32 %tmp36, %tmp41
971  %tmp43 = zext i32 %tmp to i64
972  %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
973  %tmp45 = load i8, ptr %tmp44, align 1
974  %tmp46 = zext i8 %tmp45 to i32
975  %tmp47 = shl nuw i32 %tmp46, 24
976  %tmp48 = or i32 %tmp42, %tmp47
977  ret i32 %tmp48
978}
979
980; ptr arg; i32 i;
981;
982; p0 = arg;
983; p1 = arg + i + 1;
984; p2 = arg + i + 2;
985; p3 = arg + i + 3;
986;
987; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
988;
989; This test excercises zero and any extend loads as a part of load combine pattern.
990; In order to fold the pattern above we need to reassociate the address computation
991; first. By the time the address computation is reassociated loads are combined to
992; to zext and aext loads.
993define i32 @load_i32_by_i8_zaext_loads(ptr %arg, i32 %arg1) {
994; CHECK-LABEL: load_i32_by_i8_zaext_loads:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
997; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
998; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
999; CHECK-NEXT:    retl
1000;
1001; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1002; CHECK64:       # %bb.0:
1003; CHECK64-NEXT:    movl %esi, %eax
1004; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1005; CHECK64-NEXT:    retq
1006  %tmp = add nuw nsw i32 %arg1, 3
1007  %tmp2 = add nuw nsw i32 %arg1, 2
1008  %tmp3 = add nuw nsw i32 %arg1, 1
1009  %tmp4 = zext i32 %tmp to i64
1010  %tmp5 = zext i32 %tmp2 to i64
1011  %tmp6 = zext i32 %tmp3 to i64
1012  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1013  %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1014  %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1015  %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1016  %tmp33 = zext i32 %arg1 to i64
1017  %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1018  %tmp35 = load i8, ptr %tmp34, align 1
1019  %tmp36 = zext i8 %tmp35 to i32
1020  %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1021  %tmp38 = load i8, ptr %tmp37, align 1
1022  %tmp39 = zext i8 %tmp38 to i32
1023  %tmp40 = shl nuw nsw i32 %tmp39, 8
1024  %tmp41 = or i32 %tmp40, %tmp36
1025  %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1026  %tmp43 = load i8, ptr %tmp42, align 1
1027  %tmp44 = zext i8 %tmp43 to i32
1028  %tmp45 = shl nuw nsw i32 %tmp44, 16
1029  %tmp46 = or i32 %tmp41, %tmp45
1030  %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1031  %tmp48 = load i8, ptr %tmp47, align 1
1032  %tmp49 = zext i8 %tmp48 to i32
1033  %tmp50 = shl nuw i32 %tmp49, 24
1034  %tmp51 = or i32 %tmp46, %tmp50
1035  ret i32 %tmp51
1036}
1037
1038; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1039; a sext load.
1040;
1041; ptr arg; i32 i;
1042;
1043; p0 = arg;
1044; p1 = arg + i + 1;
1045; p2 = arg + i + 2;
1046; p3 = arg + i + 3;
1047;
1048; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1049define i32 @load_i32_by_i8_zsext_loads(ptr %arg, i32 %arg1) {
1050; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1051; CHECK:       # %bb.0:
1052; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1053; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1054; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1055; CHECK-NEXT:    retl
1056;
1057; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1058; CHECK64:       # %bb.0:
1059; CHECK64-NEXT:    movl %esi, %eax
1060; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1061; CHECK64-NEXT:    retq
1062  %tmp = add nuw nsw i32 %arg1, 3
1063  %tmp2 = add nuw nsw i32 %arg1, 2
1064  %tmp3 = add nuw nsw i32 %arg1, 1
1065  %tmp4 = zext i32 %tmp to i64
1066  %tmp5 = zext i32 %tmp2 to i64
1067  %tmp6 = zext i32 %tmp3 to i64
1068  %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1069  %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1070  %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1071  %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1072  %tmp33 = zext i32 %arg1 to i64
1073  %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1074  %tmp35 = load i8, ptr %tmp34, align 1
1075  %tmp36 = zext i8 %tmp35 to i32
1076  %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1077  %tmp38 = load i8, ptr %tmp37, align 1
1078  %tmp39 = zext i8 %tmp38 to i32
1079  %tmp40 = shl nuw nsw i32 %tmp39, 8
1080  %tmp41 = or i32 %tmp40, %tmp36
1081  %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1082  %tmp43 = load i8, ptr %tmp42, align 1
1083  %tmp44 = zext i8 %tmp43 to i32
1084  %tmp45 = shl nuw nsw i32 %tmp44, 16
1085  %tmp46 = or i32 %tmp41, %tmp45
1086  %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1087  %tmp48 = load i8, ptr %tmp47, align 1
1088  %tmp49 = sext i8 %tmp48 to i16
1089  %tmp50 = zext i16 %tmp49 to i32
1090  %tmp51 = shl nuw i32 %tmp50, 24
1091  %tmp52 = or i32 %tmp46, %tmp51
1092  ret i32 %tmp52
1093}
1094
1095; ptr p;
1096; (i32) p[0] | ((i32) p[1] << 8)
1097define i32 @zext_load_i32_by_i8(ptr %arg) {
1098; CHECK-LABEL: zext_load_i32_by_i8:
1099; CHECK:       # %bb.0:
1100; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1101; CHECK-NEXT:    movzwl (%eax), %eax
1102; CHECK-NEXT:    retl
1103;
1104; CHECK64-LABEL: zext_load_i32_by_i8:
1105; CHECK64:       # %bb.0:
1106; CHECK64-NEXT:    movzwl (%rdi), %eax
1107; CHECK64-NEXT:    retq
1108  %tmp2 = load i8, ptr %arg, align 1
1109  %tmp3 = zext i8 %tmp2 to i32
1110  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1111  %tmp5 = load i8, ptr %tmp4, align 1
1112  %tmp6 = zext i8 %tmp5 to i32
1113  %tmp7 = shl nuw nsw i32 %tmp6, 8
1114  %tmp8 = or i32 %tmp7, %tmp3
1115  ret i32 %tmp8
1116}
1117
1118; ptr p;
1119; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1120define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
1121; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1122; CHECK:       # %bb.0:
1123; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1124; CHECK-NEXT:    movzbl (%eax), %ecx
1125; CHECK-NEXT:    shll $8, %ecx
1126; CHECK-NEXT:    movzbl 1(%eax), %eax
1127; CHECK-NEXT:    shll $16, %eax
1128; CHECK-NEXT:    orl %ecx, %eax
1129; CHECK-NEXT:    retl
1130;
1131; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1132; CHECK64:       # %bb.0:
1133; CHECK64-NEXT:    movzbl (%rdi), %ecx
1134; CHECK64-NEXT:    shll $8, %ecx
1135; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1136; CHECK64-NEXT:    shll $16, %eax
1137; CHECK64-NEXT:    orl %ecx, %eax
1138; CHECK64-NEXT:    retq
1139  %tmp2 = load i8, ptr %arg, align 1
1140  %tmp3 = zext i8 %tmp2 to i32
1141  %tmp30 = shl nuw nsw i32 %tmp3, 8
1142  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1143  %tmp5 = load i8, ptr %tmp4, align 1
1144  %tmp6 = zext i8 %tmp5 to i32
1145  %tmp7 = shl nuw nsw i32 %tmp6, 16
1146  %tmp8 = or i32 %tmp7, %tmp30
1147  ret i32 %tmp8
1148}
1149
1150; ptr p;
1151; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1152define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
1153; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1154; CHECK:       # %bb.0:
1155; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1156; CHECK-NEXT:    movzbl (%eax), %ecx
1157; CHECK-NEXT:    shll $16, %ecx
1158; CHECK-NEXT:    movzbl 1(%eax), %eax
1159; CHECK-NEXT:    shll $24, %eax
1160; CHECK-NEXT:    orl %ecx, %eax
1161; CHECK-NEXT:    retl
1162;
1163; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1164; CHECK64:       # %bb.0:
1165; CHECK64-NEXT:    movzbl (%rdi), %ecx
1166; CHECK64-NEXT:    shll $16, %ecx
1167; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1168; CHECK64-NEXT:    shll $24, %eax
1169; CHECK64-NEXT:    orl %ecx, %eax
1170; CHECK64-NEXT:    retq
1171  %tmp2 = load i8, ptr %arg, align 1
1172  %tmp3 = zext i8 %tmp2 to i32
1173  %tmp30 = shl nuw nsw i32 %tmp3, 16
1174  %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1175  %tmp5 = load i8, ptr %tmp4, align 1
1176  %tmp6 = zext i8 %tmp5 to i32
1177  %tmp7 = shl nuw nsw i32 %tmp6, 24
1178  %tmp8 = or i32 %tmp7, %tmp30
1179  ret i32 %tmp8
1180}
1181
1182; ptr p;
1183; (i32) p[1] | ((i32) p[0] << 8)
1184define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
1185; BSWAP-LABEL: zext_load_i32_by_i8_bswap:
1186; BSWAP:       # %bb.0:
1187; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
1188; BSWAP-NEXT:    movzwl (%eax), %eax
1189; BSWAP-NEXT:    rolw $8, %ax
1190; BSWAP-NEXT:    movzwl %ax, %eax
1191; BSWAP-NEXT:    retl
1192;
1193; MOVBE-LABEL: zext_load_i32_by_i8_bswap:
1194; MOVBE:       # %bb.0:
1195; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1196; MOVBE-NEXT:    movbew (%eax), %ax
1197; MOVBE-NEXT:    movzwl %ax, %eax
1198; MOVBE-NEXT:    retl
1199;
1200; BSWAP64-LABEL: zext_load_i32_by_i8_bswap:
1201; BSWAP64:       # %bb.0:
1202; BSWAP64-NEXT:    movzwl (%rdi), %eax
1203; BSWAP64-NEXT:    rolw $8, %ax
1204; BSWAP64-NEXT:    movzwl %ax, %eax
1205; BSWAP64-NEXT:    retq
1206;
1207; MOVBE64-LABEL: zext_load_i32_by_i8_bswap:
1208; MOVBE64:       # %bb.0:
1209; MOVBE64-NEXT:    movbew (%rdi), %ax
1210; MOVBE64-NEXT:    movzwl %ax, %eax
1211; MOVBE64-NEXT:    retq
1212  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1213  %tmp2 = load i8, ptr %tmp1, align 1
1214  %tmp3 = zext i8 %tmp2 to i32
1215  %tmp5 = load i8, ptr %arg, align 1
1216  %tmp6 = zext i8 %tmp5 to i32
1217  %tmp7 = shl nuw nsw i32 %tmp6, 8
1218  %tmp8 = or i32 %tmp7, %tmp3
1219  ret i32 %tmp8
1220}
1221
1222; ptr p;
1223; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1224define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
1225; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1226; CHECK:       # %bb.0:
1227; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1228; CHECK-NEXT:    movzbl 1(%eax), %ecx
1229; CHECK-NEXT:    shll $8, %ecx
1230; CHECK-NEXT:    movzbl (%eax), %eax
1231; CHECK-NEXT:    shll $16, %eax
1232; CHECK-NEXT:    orl %ecx, %eax
1233; CHECK-NEXT:    retl
1234;
1235; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1236; CHECK64:       # %bb.0:
1237; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1238; CHECK64-NEXT:    shll $8, %ecx
1239; CHECK64-NEXT:    movzbl (%rdi), %eax
1240; CHECK64-NEXT:    shll $16, %eax
1241; CHECK64-NEXT:    orl %ecx, %eax
1242; CHECK64-NEXT:    retq
1243  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1244  %tmp2 = load i8, ptr %tmp1, align 1
1245  %tmp3 = zext i8 %tmp2 to i32
1246  %tmp30 = shl nuw nsw i32 %tmp3, 8
1247  %tmp5 = load i8, ptr %arg, align 1
1248  %tmp6 = zext i8 %tmp5 to i32
1249  %tmp7 = shl nuw nsw i32 %tmp6, 16
1250  %tmp8 = or i32 %tmp7, %tmp30
1251  ret i32 %tmp8
1252}
1253
1254; ptr p;
1255; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1256define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
1257; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1258; CHECK:       # %bb.0:
1259; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1260; CHECK-NEXT:    movzbl 1(%eax), %ecx
1261; CHECK-NEXT:    shll $16, %ecx
1262; CHECK-NEXT:    movzbl (%eax), %eax
1263; CHECK-NEXT:    shll $24, %eax
1264; CHECK-NEXT:    orl %ecx, %eax
1265; CHECK-NEXT:    retl
1266;
1267; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1268; CHECK64:       # %bb.0:
1269; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1270; CHECK64-NEXT:    shll $16, %ecx
1271; CHECK64-NEXT:    movzbl (%rdi), %eax
1272; CHECK64-NEXT:    shll $24, %eax
1273; CHECK64-NEXT:    orl %ecx, %eax
1274; CHECK64-NEXT:    retq
1275  %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1276  %tmp2 = load i8, ptr %tmp1, align 1
1277  %tmp3 = zext i8 %tmp2 to i32
1278  %tmp30 = shl nuw nsw i32 %tmp3, 16
1279  %tmp5 = load i8, ptr %arg, align 1
1280  %tmp6 = zext i8 %tmp5 to i32
1281  %tmp7 = shl nuw nsw i32 %tmp6, 24
1282  %tmp8 = or i32 %tmp7, %tmp30
1283  ret i32 %tmp8
1284}
1285
1286define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
1287; CHECK-LABEL: pr80911_vector_load_multiuse:
1288; CHECK:       # %bb.0:
1289; CHECK-NEXT:    pushl %esi
1290; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1291; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
1292; CHECK-NEXT:    movl (%edx), %esi
1293; CHECK-NEXT:    movzwl (%edx), %eax
1294; CHECK-NEXT:    movl $0, (%ecx)
1295; CHECK-NEXT:    movl %esi, (%edx)
1296; CHECK-NEXT:    popl %esi
1297; CHECK-NEXT:    retl
1298;
1299; CHECK64-LABEL: pr80911_vector_load_multiuse:
1300; CHECK64:       # %bb.0:
1301; CHECK64-NEXT:    movl (%rdi), %ecx
1302; CHECK64-NEXT:    movzwl (%rdi), %eax
1303; CHECK64-NEXT:    movl $0, (%rsi)
1304; CHECK64-NEXT:    movl %ecx, (%rdi)
1305; CHECK64-NEXT:    retq
1306  %load = load <4 x i8>, ptr %ptr, align 16
1307  store i32 0, ptr %clobber
1308  store <4 x i8> %load, ptr %ptr, align 16
1309  %e1 = extractelement <4 x i8> %load, i64 1
1310  %e1.ext = zext i8 %e1 to i32
1311  %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
1312  %e0 = extractelement <4 x i8> %load, i64 0
1313  %e0.ext = zext i8 %e0 to i32
1314  %res = or i32 %e1.ext.shift, %e0.ext
1315  ret i32 %res
1316}
1317