xref: /llvm-project/llvm/test/CodeGen/X86/icmp-shift-opt.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
3; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
4
5; Optimize expanded SRL/SHL used as an input of
6; SETCC comparing it with zero by removing rotation.
7;
8; See https://bugs.llvm.org/show_bug.cgi?id=50197
9define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
10; X86-LABEL: opt_setcc_lt_power_of_2:
11; X86:       # %bb.0:
12; X86-NEXT:    pushl %ebp
13; X86-NEXT:    pushl %ebx
14; X86-NEXT:    pushl %edi
15; X86-NEXT:    pushl %esi
16; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
17; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
18; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
19; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
20; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
21; X86-NEXT:    .p2align 4
22; X86-NEXT:  .LBB0_1: # %loop
23; X86-NEXT:    # =>This Inner Loop Header: Depth=1
24; X86-NEXT:    addl $1, %edi
25; X86-NEXT:    adcl $0, %esi
26; X86-NEXT:    adcl $0, %edx
27; X86-NEXT:    adcl $0, %ecx
28; X86-NEXT:    movl %edx, %ebx
29; X86-NEXT:    orl %ecx, %ebx
30; X86-NEXT:    movl %esi, %ebp
31; X86-NEXT:    orl %edx, %ebp
32; X86-NEXT:    orl %ecx, %ebp
33; X86-NEXT:    shrdl $28, %ebx, %ebp
34; X86-NEXT:    jne .LBB0_1
35; X86-NEXT:  # %bb.2: # %exit
36; X86-NEXT:    movl %edi, (%eax)
37; X86-NEXT:    movl %esi, 4(%eax)
38; X86-NEXT:    movl %edx, 8(%eax)
39; X86-NEXT:    movl %ecx, 12(%eax)
40; X86-NEXT:    popl %esi
41; X86-NEXT:    popl %edi
42; X86-NEXT:    popl %ebx
43; X86-NEXT:    popl %ebp
44; X86-NEXT:    retl $4
45;
46; X64-LABEL: opt_setcc_lt_power_of_2:
47; X64:       # %bb.0:
48; X64-NEXT:    movq %rsi, %rdx
49; X64-NEXT:    movq %rdi, %rax
50; X64-NEXT:    .p2align 4
51; X64-NEXT:  .LBB0_1: # %loop
52; X64-NEXT:    # =>This Inner Loop Header: Depth=1
53; X64-NEXT:    addq $1, %rax
54; X64-NEXT:    adcq $0, %rdx
55; X64-NEXT:    movq %rax, %rcx
56; X64-NEXT:    shrq $60, %rcx
57; X64-NEXT:    orq %rdx, %rcx
58; X64-NEXT:    jne .LBB0_1
59; X64-NEXT:  # %bb.2: # %exit
60; X64-NEXT:    retq
61  br label %loop
62
63loop:
64  %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
65  %inc = add i128 %phi.a, 1
66  %cmp = icmp ult i128 %inc, 1152921504606846976
67  br i1 %cmp, label %exit, label %loop
68
69exit:
70  ret i128 %inc
71}
72
73define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
74; X86-LABEL: opt_setcc_srl_eq_zero:
75; X86:       # %bb.0:
76; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
78; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
79; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
80; X86-NEXT:    orl %eax, %edx
81; X86-NEXT:    orl %ecx, %edx
82; X86-NEXT:    orl %eax, %ecx
83; X86-NEXT:    shldl $15, %edx, %ecx
84; X86-NEXT:    sete %al
85; X86-NEXT:    retl
86;
87; X64-LABEL: opt_setcc_srl_eq_zero:
88; X64:       # %bb.0:
89; X64-NEXT:    shrq $17, %rdi
90; X64-NEXT:    orq %rsi, %rdi
91; X64-NEXT:    sete %al
92; X64-NEXT:    retq
93   %srl = lshr i128 %a, 17
94   %cmp = icmp eq i128 %srl, 0
95   ret i1 %cmp
96}
97
98define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
99; X86-LABEL: opt_setcc_srl_ne_zero:
100; X86:       # %bb.0:
101; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
103; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
104; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
105; X86-NEXT:    orl %eax, %edx
106; X86-NEXT:    orl %ecx, %edx
107; X86-NEXT:    orl %eax, %ecx
108; X86-NEXT:    shldl $15, %edx, %ecx
109; X86-NEXT:    setne %al
110; X86-NEXT:    retl
111;
112; X64-LABEL: opt_setcc_srl_ne_zero:
113; X64:       # %bb.0:
114; X64-NEXT:    shrq $17, %rdi
115; X64-NEXT:    orq %rsi, %rdi
116; X64-NEXT:    setne %al
117; X64-NEXT:    retq
118   %srl = lshr i128 %a, 17
119   %cmp = icmp ne i128 %srl, 0
120   ret i1 %cmp
121}
122
123define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
124; X86-LABEL: opt_setcc_shl_eq_zero:
125; X86:       # %bb.0:
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
127; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
128; X86-NEXT:    shll $17, %ecx
129; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
130; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
131; X86-NEXT:    orl %ecx, %eax
132; X86-NEXT:    sete %al
133; X86-NEXT:    retl
134;
135; X64-LABEL: opt_setcc_shl_eq_zero:
136; X64:       # %bb.0:
137; X64-NEXT:    shlq $17, %rsi
138; X64-NEXT:    orq %rdi, %rsi
139; X64-NEXT:    sete %al
140; X64-NEXT:    retq
141   %shl = shl i128 %a, 17
142   %cmp = icmp eq i128 %shl, 0
143   ret i1 %cmp
144}
145
146define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
147; X86-LABEL: opt_setcc_shl_ne_zero:
148; X86:       # %bb.0:
149; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
150; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
151; X86-NEXT:    shll $17, %ecx
152; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
153; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
154; X86-NEXT:    orl %ecx, %eax
155; X86-NEXT:    setne %al
156; X86-NEXT:    retl
157;
158; X64-LABEL: opt_setcc_shl_ne_zero:
159; X64:       # %bb.0:
160; X64-NEXT:    shlq $17, %rsi
161; X64-NEXT:    orq %rdi, %rsi
162; X64-NEXT:    setne %al
163; X64-NEXT:    retq
164   %shl = shl i128 %a, 17
165   %cmp = icmp ne i128 %shl, 0
166   ret i1 %cmp
167}
168
169; Negative test: optimization should not be applied if shift has multiple users.
170define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
171; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
172; X86:       # %bb.0:
173; X86-NEXT:    pushl %ebx
174; X86-NEXT:    pushl %edi
175; X86-NEXT:    pushl %esi
176; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
177; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
180; X86-NEXT:    shldl $17, %edx, %esi
181; X86-NEXT:    shldl $17, %ecx, %edx
182; X86-NEXT:    shldl $17, %eax, %ecx
183; X86-NEXT:    shll $17, %eax
184; X86-NEXT:    movl %ecx, %edi
185; X86-NEXT:    orl %esi, %edi
186; X86-NEXT:    movl %eax, %ebx
187; X86-NEXT:    orl %edx, %ebx
188; X86-NEXT:    orl %edi, %ebx
189; X86-NEXT:    sete %bl
190; X86-NEXT:    pushl %esi
191; X86-NEXT:    pushl %edx
192; X86-NEXT:    pushl %ecx
193; X86-NEXT:    pushl %eax
194; X86-NEXT:    calll use@PLT
195; X86-NEXT:    addl $16, %esp
196; X86-NEXT:    movl %ebx, %eax
197; X86-NEXT:    popl %esi
198; X86-NEXT:    popl %edi
199; X86-NEXT:    popl %ebx
200; X86-NEXT:    retl
201;
202; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
203; X64:       # %bb.0:
204; X64-NEXT:    pushq %rbx
205; X64-NEXT:    shldq $17, %rdi, %rsi
206; X64-NEXT:    shlq $17, %rdi
207; X64-NEXT:    movq %rdi, %rax
208; X64-NEXT:    orq %rsi, %rax
209; X64-NEXT:    sete %bl
210; X64-NEXT:    callq use@PLT
211; X64-NEXT:    movl %ebx, %eax
212; X64-NEXT:    popq %rbx
213; X64-NEXT:    retq
214   %shl = shl i128 %a, 17
215   %cmp = icmp eq i128 %shl, 0
216   call void @use(i128 %shl)
217   ret i1 %cmp
218}
219
220; Check that optimization is applied to DAG having appropriate shape
221; even if there were no actual shift's expansion.
222define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
223; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
224; X86:       # %bb.0:
225; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
227; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
228; X86-NEXT:    shll $17, %ecx
229; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
230; X86-NEXT:    orl %ecx, %eax
231; X86-NEXT:    sete %al
232; X86-NEXT:    retl
233;
234; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
235; X64:       # %bb.0:
236; X64-NEXT:    shlq $17, %rdi
237; X64-NEXT:    orq %rsi, %rdi
238; X64-NEXT:    sete %al
239; X64-NEXT:    retq
240  %shl.a = shl i64 %a, 17
241  %srl.b = lshr i64 %b, 47
242  %or.0 = or i64 %shl.a, %srl.b
243  %shl.b = shl i64 %b, 17
244  %or.1 = or i64 %or.0, %shl.b
245  %cmp = icmp eq i64 %or.1, 0
246  ret i1 %cmp
247}
248
249; Negative test: optimization should not be applied as
250; constants used in shifts do not match.
251define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
252; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
253; X86:       # %bb.0:
254; X86-NEXT:    pushl %esi
255; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
256; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
257; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
258; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
259; X86-NEXT:    shldl $17, %edx, %esi
260; X86-NEXT:    shldl $17, %ecx, %edx
261; X86-NEXT:    shldl $18, %eax, %ecx
262; X86-NEXT:    shll $18, %eax
263; X86-NEXT:    orl %edx, %eax
264; X86-NEXT:    orl %esi, %ecx
265; X86-NEXT:    orl %eax, %ecx
266; X86-NEXT:    sete %al
267; X86-NEXT:    popl %esi
268; X86-NEXT:    retl
269;
270; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
271; X64:       # %bb.0:
272; X64-NEXT:    shldq $17, %rsi, %rdi
273; X64-NEXT:    shlq $18, %rsi
274; X64-NEXT:    orq %rdi, %rsi
275; X64-NEXT:    sete %al
276; X64-NEXT:    retq
277  %shl.a = shl i64 %a, 17
278  %srl.b = lshr i64 %b, 47
279  %or.0 = or i64 %shl.a, %srl.b
280  %shl.b = shl i64 %b, 18
281  %or.1 = or i64 %or.0, %shl.b
282  %cmp = icmp eq i64 %or.1, 0
283  ret i1 %cmp
284}
285
286declare void @use(i128 %a)
287