xref: /llvm-project/llvm/test/CodeGen/X86/shift-combine.ll (revision 2b63077cfa13095b3e64f79fe825cc85ca9da7be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --check-prefix=X86
3; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64
4
5@array = weak dso_local global [4 x i32] zeroinitializer
6
7define dso_local i32 @test_lshr_and(i32 %x) {
8; X86-LABEL: test_lshr_and:
9; X86:       # %bb.0:
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X86-NEXT:    andl $12, %eax
12; X86-NEXT:    movl array(%eax), %eax
13; X86-NEXT:    retl
14;
15; X64-LABEL: test_lshr_and:
16; X64:       # %bb.0:
17; X64-NEXT:    # kill: def $edi killed $edi def $rdi
18; X64-NEXT:    andl $12, %edi
19; X64-NEXT:    movl array(%rdi), %eax
20; X64-NEXT:    retq
21  %tmp2 = lshr i32 %x, 2
22  %tmp3 = and i32 %tmp2, 3
23  %tmp4 = getelementptr [4 x i32], ptr @array, i32 0, i32 %tmp3
24  %tmp5 = load i32, ptr %tmp4, align 4
25  ret i32 %tmp5
26}
27
28define dso_local ptr @test_exact1(i32 %a, i32 %b, ptr %x)  {
29; X86-LABEL: test_exact1:
30; X86:       # %bb.0:
31; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
32; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
33; X86-NEXT:    sarl %eax
34; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
35; X86-NEXT:    retl
36;
37; X64-LABEL: test_exact1:
38; X64:       # %bb.0:
39; X64-NEXT:    subl %edi, %esi
40; X64-NEXT:    sarl $3, %esi
41; X64-NEXT:    movslq %esi, %rax
42; X64-NEXT:    leaq (%rdx,%rax,4), %rax
43; X64-NEXT:    retq
44  %sub = sub i32 %b, %a
45  %shr = ashr exact i32 %sub, 3
46  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
47  ret ptr %gep
48}
49
50define dso_local ptr @test_exact2(i32 %a, i32 %b, ptr %x)  {
51; X86-LABEL: test_exact2:
52; X86:       # %bb.0:
53; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
54; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
55; X86-NEXT:    sarl %eax
56; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
57; X86-NEXT:    retl
58;
59; X64-LABEL: test_exact2:
60; X64:       # %bb.0:
61; X64-NEXT:    subl %edi, %esi
62; X64-NEXT:    sarl $3, %esi
63; X64-NEXT:    movslq %esi, %rax
64; X64-NEXT:    leaq (%rdx,%rax,4), %rax
65; X64-NEXT:    retq
66  %sub = sub i32 %b, %a
67  %shr = ashr exact i32 %sub, 3
68  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
69  ret ptr %gep
70}
71
72define dso_local ptr @test_exact3(i32 %a, i32 %b, ptr %x)  {
73; X86-LABEL: test_exact3:
74; X86:       # %bb.0:
75; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
76; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
78; X86-NEXT:    retl
79;
80; X64-LABEL: test_exact3:
81; X64:       # %bb.0:
82; X64-NEXT:    subl %edi, %esi
83; X64-NEXT:    sarl $2, %esi
84; X64-NEXT:    movslq %esi, %rax
85; X64-NEXT:    leaq (%rdx,%rax,4), %rax
86; X64-NEXT:    retq
87  %sub = sub i32 %b, %a
88  %shr = ashr exact i32 %sub, 2
89  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
90  ret ptr %gep
91}
92
93define dso_local ptr @test_exact4(i32 %a, i32 %b, ptr %x)  {
94; X86-LABEL: test_exact4:
95; X86:       # %bb.0:
96; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
97; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
98; X86-NEXT:    shrl %eax
99; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
100; X86-NEXT:    retl
101;
102; X64-LABEL: test_exact4:
103; X64:       # %bb.0:
104; X64-NEXT:    # kill: def $esi killed $esi def $rsi
105; X64-NEXT:    subl %edi, %esi
106; X64-NEXT:    shrl %esi
107; X64-NEXT:    leaq (%rsi,%rdx), %rax
108; X64-NEXT:    retq
109  %sub = sub i32 %b, %a
110  %shr = lshr exact i32 %sub, 3
111  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
112  ret ptr %gep
113}
114
115define dso_local ptr @test_exact5(i32 %a, i32 %b, ptr %x)  {
116; X86-LABEL: test_exact5:
117; X86:       # %bb.0:
118; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
119; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
120; X86-NEXT:    shrl %eax
121; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
122; X86-NEXT:    retl
123;
124; X64-LABEL: test_exact5:
125; X64:       # %bb.0:
126; X64-NEXT:    # kill: def $esi killed $esi def $rsi
127; X64-NEXT:    subl %edi, %esi
128; X64-NEXT:    shrl %esi
129; X64-NEXT:    leaq (%rsi,%rdx), %rax
130; X64-NEXT:    retq
131  %sub = sub i32 %b, %a
132  %shr = lshr exact i32 %sub, 3
133  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
134  ret ptr %gep
135}
136
137define dso_local ptr @test_exact6(i32 %a, i32 %b, ptr %x)  {
138; X86-LABEL: test_exact6:
139; X86:       # %bb.0:
140; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
141; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
142; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
143; X86-NEXT:    retl
144;
145; X64-LABEL: test_exact6:
146; X64:       # %bb.0:
147; X64-NEXT:    # kill: def $esi killed $esi def $rsi
148; X64-NEXT:    subl %edi, %esi
149; X64-NEXT:    leaq (%rsi,%rdx), %rax
150; X64-NEXT:    retq
151  %sub = sub i32 %b, %a
152  %shr = lshr exact i32 %sub, 2
153  %gep = getelementptr inbounds i32, ptr %x, i32 %shr
154  ret ptr %gep
155}
156
157; PR42644 - https://bugs.llvm.org/show_bug.cgi?id=42644
158
159define i64 @ashr_add_shl_i32(i64 %r) nounwind {
160; X86-LABEL: ashr_add_shl_i32:
161; X86:       # %bb.0:
162; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
163; X86-NEXT:    incl %eax
164; X86-NEXT:    movl %eax, %edx
165; X86-NEXT:    sarl $31, %edx
166; X86-NEXT:    retl
167;
168; X64-LABEL: ashr_add_shl_i32:
169; X64:       # %bb.0:
170; X64-NEXT:    incl %edi
171; X64-NEXT:    movslq %edi, %rax
172; X64-NEXT:    retq
173  %conv = shl i64 %r, 32
174  %sext = add i64 %conv, 4294967296
175  %conv1 = ashr i64 %sext, 32
176  ret i64 %conv1
177}
178
179define i64 @ashr_add_shl_i8(i64 %r) nounwind {
180; X86-LABEL: ashr_add_shl_i8:
181; X86:       # %bb.0:
182; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
183; X86-NEXT:    addb $2, %al
184; X86-NEXT:    movsbl %al, %eax
185; X86-NEXT:    movl %eax, %edx
186; X86-NEXT:    sarl $31, %edx
187; X86-NEXT:    retl
188;
189; X64-LABEL: ashr_add_shl_i8:
190; X64:       # %bb.0:
191; X64-NEXT:    addb $2, %dil
192; X64-NEXT:    movsbq %dil, %rax
193; X64-NEXT:    retq
194  %conv = shl i64 %r, 56
195  %sext = add i64 %conv, 144115188075855872
196  %conv1 = ashr i64 %sext, 56
197  ret i64 %conv1
198}
199
200define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind {
201; X86-LABEL: ashr_add_shl_v4i8:
202; X86:       # %bb.0:
203; X86-NEXT:    pushl %edi
204; X86-NEXT:    pushl %esi
205; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
206; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
207; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
208; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
209; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
210; X86-NEXT:    incb %dh
211; X86-NEXT:    movsbl %dh, %esi
212; X86-NEXT:    incb %ch
213; X86-NEXT:    movsbl %ch, %edi
214; X86-NEXT:    incb %dl
215; X86-NEXT:    movsbl %dl, %edx
216; X86-NEXT:    incb %cl
217; X86-NEXT:    movsbl %cl, %ecx
218; X86-NEXT:    movl %ecx, 12(%eax)
219; X86-NEXT:    movl %edx, 8(%eax)
220; X86-NEXT:    movl %edi, 4(%eax)
221; X86-NEXT:    movl %esi, (%eax)
222; X86-NEXT:    popl %esi
223; X86-NEXT:    popl %edi
224; X86-NEXT:    retl $4
225;
226; X64-LABEL: ashr_add_shl_v4i8:
227; X64:       # %bb.0:
228; X64-NEXT:    pslld $24, %xmm0
229; X64-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230; X64-NEXT:    psrad $24, %xmm0
231; X64-NEXT:    retq
232  %conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24>
233  %sext = add <4 x i32> %conv, <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
234  %conv1 = ashr <4 x i32> %sext, <i32 24, i32 24, i32 24, i32 24>
235  ret <4 x i32> %conv1
236}
237
238define i64 @ashr_add_shl_i36(i64 %r) nounwind {
239; X86-LABEL: ashr_add_shl_i36:
240; X86:       # %bb.0:
241; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
242; X86-NEXT:    shll $4, %edx
243; X86-NEXT:    movl %edx, %eax
244; X86-NEXT:    sarl $4, %eax
245; X86-NEXT:    sarl $31, %edx
246; X86-NEXT:    retl
247;
248; X64-LABEL: ashr_add_shl_i36:
249; X64:       # %bb.0:
250; X64-NEXT:    movq %rdi, %rax
251; X64-NEXT:    shlq $36, %rax
252; X64-NEXT:    sarq $36, %rax
253; X64-NEXT:    retq
254  %conv = shl i64 %r, 36
255  %sext = add i64 %conv, 4294967296
256  %conv1 = ashr i64 %sext, 36
257  ret i64 %conv1
258}
259
260define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) nounwind {
261; X86-LABEL: ashr_add_shl_mismatch_shifts1:
262; X86:       # %bb.0:
263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
264; X86-NEXT:    incl %eax
265; X86-NEXT:    movl %eax, %edx
266; X86-NEXT:    sarl $31, %edx
267; X86-NEXT:    retl
268;
269; X64-LABEL: ashr_add_shl_mismatch_shifts1:
270; X64:       # %bb.0:
271; X64-NEXT:    shlq $8, %rdi
272; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
273; X64-NEXT:    addq %rdi, %rax
274; X64-NEXT:    sarq $32, %rax
275; X64-NEXT:    retq
276  %conv = shl i64 %r, 8
277  %sext = add i64 %conv, 4294967296
278  %conv1 = ashr i64 %sext, 32
279  ret i64 %conv1
280}
281
282define i64 @ashr_add_shl_mismatch_shifts2(i64 %r) nounwind {
283; X86-LABEL: ashr_add_shl_mismatch_shifts2:
284; X86:       # %bb.0:
285; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
286; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
287; X86-NEXT:    shrl $8, %edx
288; X86-NEXT:    incl %edx
289; X86-NEXT:    shrdl $8, %edx, %eax
290; X86-NEXT:    shrl $8, %edx
291; X86-NEXT:    retl
292;
293; X64-LABEL: ashr_add_shl_mismatch_shifts2:
294; X64:       # %bb.0:
295; X64-NEXT:    shrq $8, %rdi
296; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
297; X64-NEXT:    addq %rdi, %rax
298; X64-NEXT:    shrq $8, %rax
299; X64-NEXT:    retq
300  %conv = lshr i64 %r, 8
301  %sext = add i64 %conv, 4294967296
302  %conv1 = ashr i64 %sext, 8
303  ret i64 %conv1
304}
305
306define dso_local i32 @ashr_add_shl_i32_i8_extra_use1(i32 %r, ptr %p) nounwind {
307; X86-LABEL: ashr_add_shl_i32_i8_extra_use1:
308; X86:       # %bb.0:
309; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
310; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86-NEXT:    shll $24, %eax
312; X86-NEXT:    addl $33554432, %eax # imm = 0x2000000
313; X86-NEXT:    movl %eax, (%ecx)
314; X86-NEXT:    sarl $24, %eax
315; X86-NEXT:    retl
316;
317; X64-LABEL: ashr_add_shl_i32_i8_extra_use1:
318; X64:       # %bb.0:
319; X64-NEXT:    # kill: def $edi killed $edi def $rdi
320; X64-NEXT:    shll $24, %edi
321; X64-NEXT:    leal 33554432(%rdi), %eax
322; X64-NEXT:    movl %eax, (%rsi)
323; X64-NEXT:    sarl $24, %eax
324; X64-NEXT:    retq
325  %conv = shl i32 %r, 24
326  %sext = add i32 %conv, 33554432
327  store i32 %sext, ptr %p
328  %conv1 = ashr i32 %sext, 24
329  ret i32 %conv1
330}
331
332define dso_local i32 @ashr_add_shl_i32_i8_extra_use2(i32 %r, ptr %p) nounwind {
333; X86-LABEL: ashr_add_shl_i32_i8_extra_use2:
334; X86:       # %bb.0:
335; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
336; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
337; X86-NEXT:    shll $24, %eax
338; X86-NEXT:    movl %eax, (%ecx)
339; X86-NEXT:    addl $33554432, %eax # imm = 0x2000000
340; X86-NEXT:    sarl $24, %eax
341; X86-NEXT:    retl
342;
343; X64-LABEL: ashr_add_shl_i32_i8_extra_use2:
344; X64:       # %bb.0:
345; X64-NEXT:    # kill: def $edi killed $edi def $rdi
346; X64-NEXT:    shll $24, %edi
347; X64-NEXT:    movl %edi, (%rsi)
348; X64-NEXT:    leal 33554432(%rdi), %eax
349; X64-NEXT:    sarl $24, %eax
350; X64-NEXT:    retq
351  %conv = shl i32 %r, 24
352  store i32 %conv, ptr %p
353  %sext = add i32 %conv, 33554432
354  %conv1 = ashr i32 %sext, 24
355  ret i32 %conv1
356}
357
358define dso_local i32 @ashr_add_shl_i32_i8_extra_use3(i32 %r, ptr %p1, ptr %p2) nounwind {
359; X86-LABEL: ashr_add_shl_i32_i8_extra_use3:
360; X86:       # %bb.0:
361; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
362; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
363; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
364; X86-NEXT:    shll $24, %eax
365; X86-NEXT:    movl %eax, (%edx)
366; X86-NEXT:    addl $33554432, %eax # imm = 0x2000000
367; X86-NEXT:    movl %eax, (%ecx)
368; X86-NEXT:    sarl $24, %eax
369; X86-NEXT:    retl
370;
371; X64-LABEL: ashr_add_shl_i32_i8_extra_use3:
372; X64:       # %bb.0:
373; X64-NEXT:    # kill: def $edi killed $edi def $rdi
374; X64-NEXT:    shll $24, %edi
375; X64-NEXT:    movl %edi, (%rsi)
376; X64-NEXT:    leal 33554432(%rdi), %eax
377; X64-NEXT:    movl %eax, (%rdx)
378; X64-NEXT:    sarl $24, %eax
379; X64-NEXT:    retq
380  %conv = shl i32 %r, 24
381  store i32 %conv, ptr %p1
382  %sext = add i32 %conv, 33554432
383  store i32 %sext, ptr %p2
384  %conv1 = ashr i32 %sext, 24
385  ret i32 %conv1
386}
387
388%"class.QPainterPath" = type { double, double, i32 }
389
390define dso_local void @PR42880(i32 %t0) {
391; X86-LABEL: PR42880:
392; X86:       # %bb.0:
393; X86-NEXT:    xorl %eax, %eax
394; X86-NEXT:    testb %al, %al
395; X86-NEXT:    je .LBB16_1
396; X86-NEXT:  # %bb.2: # %if
397; X86-NEXT:  .LBB16_1: # %then
398;
399; X64-LABEL: PR42880:
400; X64:       # %bb.0:
401; X64-NEXT:    xorl %eax, %eax
402; X64-NEXT:    testb %al, %al
403; X64-NEXT:    je .LBB16_1
404; X64-NEXT:  # %bb.2: # %if
405; X64-NEXT:  .LBB16_1: # %then
406  %sub = add nsw i32 %t0, -1
407  %add.ptr.i94 = getelementptr inbounds %"class.QPainterPath", ptr null, i32 %sub
408  %x = ptrtoint ptr %add.ptr.i94 to i32
409  %sub2 = sub i32 %x, 0
410  %div = sdiv exact i32 %sub2, 24
411  br i1 poison, label %if, label %then
412
413then:
414  %t1 = xor i32 %div, -1
415  unreachable
416
417if:
418  unreachable
419}
420
421; The mul here is the equivalent of (neg (shl X, 32)).
422define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
423; X86-LABEL: ashr_add_neg_shl_i32:
424; X86:       # %bb.0:
425; X86-NEXT:    movl $1, %eax
426; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
427; X86-NEXT:    movl %eax, %edx
428; X86-NEXT:    sarl $31, %edx
429; X86-NEXT:    retl
430;
431; X64-LABEL: ashr_add_neg_shl_i32:
432; X64:       # %bb.0:
433; X64-NEXT:    movl $1, %eax
434; X64-NEXT:    subl %edi, %eax
435; X64-NEXT:    cltq
436; X64-NEXT:    retq
437  %conv = mul i64 %r, -4294967296
438  %sext = add i64 %conv, 4294967296
439  %conv1 = ashr i64 %sext, 32
440  ret i64 %conv1
441}
442
443; The mul here is the equivalent of (neg (shl X, 56)).
444define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
445; X86-LABEL: ashr_add_neg_shl_i8:
446; X86:       # %bb.0:
447; X86-NEXT:    movb $2, %al
448; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
449; X86-NEXT:    movsbl %al, %eax
450; X86-NEXT:    movl %eax, %edx
451; X86-NEXT:    sarl $31, %edx
452; X86-NEXT:    retl
453;
454; X64-LABEL: ashr_add_neg_shl_i8:
455; X64:       # %bb.0:
456; X64-NEXT:    movb $2, %al
457; X64-NEXT:    subb %dil, %al
458; X64-NEXT:    movsbq %al, %rax
459; X64-NEXT:    retq
460  %conv = mul i64 %r, -72057594037927936
461  %sext = add i64 %conv, 144115188075855872
462  %conv1 = ashr i64 %sext, 56
463  ret i64 %conv1
464}
465
466; The mul here is the equivalent of (neg (shl X, 24)).
467define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind {
468; X86-LABEL: ashr_add_neg_shl_v4i8:
469; X86:       # %bb.0:
470; X86-NEXT:    pushl %edi
471; X86-NEXT:    pushl %esi
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
473; X86-NEXT:    movb $1, %cl
474; X86-NEXT:    movb $1, %dl
475; X86-NEXT:    subb {{[0-9]+}}(%esp), %dl
476; X86-NEXT:    movsbl %dl, %edx
477; X86-NEXT:    movb $1, %ch
478; X86-NEXT:    subb {{[0-9]+}}(%esp), %ch
479; X86-NEXT:    movsbl %ch, %esi
480; X86-NEXT:    movb $1, %ch
481; X86-NEXT:    subb {{[0-9]+}}(%esp), %ch
482; X86-NEXT:    movsbl %ch, %edi
483; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
484; X86-NEXT:    movsbl %cl, %ecx
485; X86-NEXT:    movl %ecx, 12(%eax)
486; X86-NEXT:    movl %edi, 8(%eax)
487; X86-NEXT:    movl %esi, 4(%eax)
488; X86-NEXT:    movl %edx, (%eax)
489; X86-NEXT:    popl %esi
490; X86-NEXT:    popl %edi
491; X86-NEXT:    retl $4
492;
493; X64-LABEL: ashr_add_neg_shl_v4i8:
494; X64:       # %bb.0:
495; X64-NEXT:    pslld $24, %xmm0
496; X64-NEXT:    movdqa {{.*#+}} xmm1 = [16777216,16777216,16777216,16777216]
497; X64-NEXT:    psubd %xmm0, %xmm1
498; X64-NEXT:    psrad $24, %xmm1
499; X64-NEXT:    movdqa %xmm1, %xmm0
500; X64-NEXT:    retq
501  %conv = mul <4 x i32> %r, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
502  %sext = add <4 x i32> %conv, <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
503  %conv1 = ashr <4 x i32> %sext, <i32 24, i32 24, i32 24, i32 24>
504  ret <4 x i32> %conv1
505}
506
507define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
508; X86-LABEL: or_tree_with_shifts_i32:
509; X86:       # %bb.0:
510; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
511; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
512; X86-NEXT:    shll $16, %eax
513; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
514; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
515; X86-NEXT:    retl
516;
517; X64-LABEL: or_tree_with_shifts_i32:
518; X64:       # %bb.0:
519; X64-NEXT:    movl %esi, %eax
520; X64-NEXT:    orl %edx, %edi
521; X64-NEXT:    shll $16, %edi
522; X64-NEXT:    orl %ecx, %eax
523; X64-NEXT:    orl %edi, %eax
524; X64-NEXT:    retq
525  %a.shifted = shl i32 %a, 16
526  %c.shifted = shl i32 %c, 16
527  %or.ab = or i32 %a.shifted, %b
528  %or.cd = or i32 %c.shifted, %d
529  %r = or i32 %or.ab, %or.cd
530  ret i32 %r
531}
532
533define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
534; X86-LABEL: xor_tree_with_shifts_i32:
535; X86:       # %bb.0:
536; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
537; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
538; X86-NEXT:    shrl $16, %eax
539; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
540; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
541; X86-NEXT:    retl
542;
543; X64-LABEL: xor_tree_with_shifts_i32:
544; X64:       # %bb.0:
545; X64-NEXT:    movl %esi, %eax
546; X64-NEXT:    xorl %edx, %edi
547; X64-NEXT:    shrl $16, %edi
548; X64-NEXT:    xorl %ecx, %eax
549; X64-NEXT:    xorl %edi, %eax
550; X64-NEXT:    retq
551  %a.shifted = lshr i32 %a, 16
552  %c.shifted = lshr i32 %c, 16
553  %xor.ab = xor i32 %a.shifted, %b
554  %xor.cd = xor i32 %d, %c.shifted
555  %r = xor i32 %xor.ab, %xor.cd
556  ret i32 %r
557}
558
559define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
560; X86-LABEL: and_tree_with_shifts_i32:
561; X86:       # %bb.0:
562; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
563; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
564; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
565; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
566; X86-NEXT:    andl %ecx, %eax
567; X86-NEXT:    retl
568;
569; X64-LABEL: and_tree_with_shifts_i32:
570; X64:       # %bb.0:
571; X64-NEXT:    movl %esi, %eax
572; X64-NEXT:    andl %edx, %edi
573; X64-NEXT:    sarl $16, %edi
574; X64-NEXT:    andl %ecx, %eax
575; X64-NEXT:    andl %edi, %eax
576; X64-NEXT:    retq
577  %a.shifted = ashr i32 %a, 16
578  %c.shifted = ashr i32 %c, 16
579  %and.ab = and i32 %b, %a.shifted
580  %and.cd = and i32 %c.shifted, %d
581  %r = and i32 %and.ab, %and.cd
582  ret i32 %r
583}
584
585define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) {
586; X86-LABEL: logic_tree_with_shifts_var_i32:
587; X86:       # %bb.0:
588; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
589; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
590; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
591; X86-NEXT:    shll %cl, %eax
592; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
593; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
594; X86-NEXT:    retl
595;
596; X64-LABEL: logic_tree_with_shifts_var_i32:
597; X64:       # %bb.0:
598; X64-NEXT:    movl %ecx, %eax
599; X64-NEXT:    orl %edx, %edi
600; X64-NEXT:    movl %r8d, %ecx
601; X64-NEXT:    shll %cl, %edi
602; X64-NEXT:    orl %esi, %eax
603; X64-NEXT:    orl %edi, %eax
604; X64-NEXT:    retq
605  %a.shifted = shl i32 %a, %s
606  %c.shifted = shl i32 %c, %s
607  %or.ab = or i32 %b, %a.shifted
608  %or.cd = or i32 %d, %c.shifted
609  %r = or i32 %or.ab, %or.cd
610  ret i32 %r
611}
612
613define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
614; X86-LABEL: logic_tree_with_mismatching_shifts_i32:
615; X86:       # %bb.0:
616; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
617; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
618; X86-NEXT:    shll $15, %ecx
619; X86-NEXT:    shll $16, %eax
620; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
621; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
622; X86-NEXT:    orl %ecx, %eax
623; X86-NEXT:    retl
624;
625; X64-LABEL: logic_tree_with_mismatching_shifts_i32:
626; X64:       # %bb.0:
627; X64-NEXT:    movl %edx, %eax
628; X64-NEXT:    shll $15, %edi
629; X64-NEXT:    shll $16, %eax
630; X64-NEXT:    orl %esi, %edi
631; X64-NEXT:    orl %ecx, %eax
632; X64-NEXT:    orl %edi, %eax
633; X64-NEXT:    retq
634  %a.shifted = shl i32 %a, 15
635  %c.shifted = shl i32 %c, 16
636  %or.ab = or i32 %a.shifted, %b
637  %or.cd = or i32 %c.shifted, %d
638  %r = or i32 %or.ab, %or.cd
639  ret i32 %r
640}
641
642define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
643; X86-LABEL: logic_tree_with_mismatching_shifts2_i32:
644; X86:       # %bb.0:
645; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
646; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
647; X86-NEXT:    shll $16, %ecx
648; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
649; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
650; X86-NEXT:    orl %ecx, %eax
651; X86-NEXT:    retl
652;
653; X64-LABEL: logic_tree_with_mismatching_shifts2_i32:
654; X64:       # %bb.0:
655; X64-NEXT:    movl %edx, %eax
656; X64-NEXT:    shll $16, %edi
657; X64-NEXT:    shrl $16, %eax
658; X64-NEXT:    orl %esi, %edi
659; X64-NEXT:    orl %ecx, %eax
660; X64-NEXT:    orl %edi, %eax
661; X64-NEXT:    retq
662  %a.shifted = shl i32 %a, 16
663  %c.shifted = lshr i32 %c, 16
664  %or.ab = or i32 %a.shifted, %b
665  %or.cd = or i32 %c.shifted, %d
666  %r = or i32 %or.ab, %or.cd
667  ret i32 %r
668}
669
670define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
671; X86-LABEL: or_tree_with_shifts_vec_i32:
672; X86:       # %bb.0:
673; X86-NEXT:    pushl %edi
674; X86-NEXT:    .cfi_def_cfa_offset 8
675; X86-NEXT:    pushl %esi
676; X86-NEXT:    .cfi_def_cfa_offset 12
677; X86-NEXT:    .cfi_offset %esi, -12
678; X86-NEXT:    .cfi_offset %edi, -8
679; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
680; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
681; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
682; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
683; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
684; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
685; X86-NEXT:    shll $16, %ecx
686; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
687; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
688; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
689; X86-NEXT:    shll $16, %edx
690; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
691; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
692; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
693; X86-NEXT:    shll $16, %esi
694; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
695; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
696; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
697; X86-NEXT:    shll $16, %edi
698; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
699; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
700; X86-NEXT:    movl %edi, 12(%eax)
701; X86-NEXT:    movl %esi, 8(%eax)
702; X86-NEXT:    movl %edx, 4(%eax)
703; X86-NEXT:    movl %ecx, (%eax)
704; X86-NEXT:    popl %esi
705; X86-NEXT:    .cfi_def_cfa_offset 8
706; X86-NEXT:    popl %edi
707; X86-NEXT:    .cfi_def_cfa_offset 4
708; X86-NEXT:    retl $4
709;
710; X64-LABEL: or_tree_with_shifts_vec_i32:
711; X64:       # %bb.0:
712; X64-NEXT:    por %xmm2, %xmm0
713; X64-NEXT:    pslld $16, %xmm0
714; X64-NEXT:    por %xmm3, %xmm1
715; X64-NEXT:    por %xmm1, %xmm0
716; X64-NEXT:    retq
717  %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
718  %c.shifted = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
719  %or.ab = or <4 x i32> %a.shifted, %b
720  %or.cd = or <4 x i32> %c.shifted, %d
721  %r = or <4 x i32> %or.ab, %or.cd
722  ret <4 x i32> %r
723}
724
725define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
726; X86-LABEL: or_tree_with_mismatching_shifts_vec_i32:
727; X86:       # %bb.0:
728; X86-NEXT:    pushl %edi
729; X86-NEXT:    .cfi_def_cfa_offset 8
730; X86-NEXT:    pushl %esi
731; X86-NEXT:    .cfi_def_cfa_offset 12
732; X86-NEXT:    .cfi_offset %esi, -12
733; X86-NEXT:    .cfi_offset %edi, -8
734; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
735; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
736; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
737; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
738; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
739; X86-NEXT:    shll $16, %eax
740; X86-NEXT:    shll $17, %ecx
741; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
742; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
743; X86-NEXT:    orl %eax, %ecx
744; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
745; X86-NEXT:    shll $16, %eax
746; X86-NEXT:    shll $17, %edx
747; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
748; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
749; X86-NEXT:    orl %eax, %edx
750; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
751; X86-NEXT:    shll $16, %eax
752; X86-NEXT:    shll $17, %esi
753; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
754; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
755; X86-NEXT:    orl %eax, %esi
756; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
757; X86-NEXT:    shll $16, %eax
758; X86-NEXT:    shll $17, %edi
759; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
760; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
761; X86-NEXT:    orl %eax, %edi
762; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
763; X86-NEXT:    movl %ecx, 12(%eax)
764; X86-NEXT:    movl %edx, 8(%eax)
765; X86-NEXT:    movl %esi, 4(%eax)
766; X86-NEXT:    movl %edi, (%eax)
767; X86-NEXT:    popl %esi
768; X86-NEXT:    .cfi_def_cfa_offset 8
769; X86-NEXT:    popl %edi
770; X86-NEXT:    .cfi_def_cfa_offset 4
771; X86-NEXT:    retl $4
772;
773; X64-LABEL: or_tree_with_mismatching_shifts_vec_i32:
774; X64:       # %bb.0:
775; X64-NEXT:    pslld $16, %xmm0
776; X64-NEXT:    pslld $17, %xmm2
777; X64-NEXT:    por %xmm1, %xmm0
778; X64-NEXT:    por %xmm3, %xmm2
779; X64-NEXT:    por %xmm2, %xmm0
780; X64-NEXT:    retq
781  %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
782  %c.shifted = shl <4 x i32> %c, <i32 17, i32 17, i32 17, i32 17>
783  %or.ab = or <4 x i32> %a.shifted, %b
784  %or.cd = or <4 x i32> %c.shifted, %d
785  %r = or <4 x i32> %or.ab, %or.cd
786  ret <4 x i32> %r
787}
788
789; Reproducer for a DAGCombiner::combineShiftOfShiftedLogic bug. DAGCombiner
790; need to check that the sum of the shift amounts fits in i8, which is the
791; legal type used to described X86 shift amounts. Verify that we do not try to
792; create a shift with 130+160 as shift amount, and verify that the stored
793; value do not depend on %a1.
794define void @combineShiftOfShiftedLogic(i128 %a1, i32 %a2, ptr %p) {
795; X86-LABEL: combineShiftOfShiftedLogic:
796; X86:       # %bb.0:
797; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
798; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
799; X86-NEXT:    movl %eax, 20(%ecx)
800; X86-NEXT:    movl $0, 16(%ecx)
801; X86-NEXT:    movl $0, 12(%ecx)
802; X86-NEXT:    movl $0, 8(%ecx)
803; X86-NEXT:    movl $0, 4(%ecx)
804; X86-NEXT:    movl $0, (%ecx)
805; X86-NEXT:    retl
806;
807; X64-LABEL: combineShiftOfShiftedLogic:
808; X64:       # %bb.0:
809; X64-NEXT:    # kill: def $edx killed $edx def $rdx
810; X64-NEXT:    shlq $32, %rdx
811; X64-NEXT:    movq %rdx, 16(%rcx)
812; X64-NEXT:    movq $0, 8(%rcx)
813; X64-NEXT:    movq $0, (%rcx)
814; X64-NEXT:    retq
815  %zext1 = zext i128 %a1 to i192
816  %zext2 = zext i32 %a2 to i192
817  %shl = shl i192 %zext1, 130
818  %or = or i192 %shl, %zext2
819  %res = shl i192 %or, 160
820  store i192 %res, ptr %p, align 8
821  ret void
822}
823