xref: /llvm-project/llvm/test/CodeGen/X86/rotate-extract.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
4
5; Check that under certain conditions we can factor out a rotate
6; from the following idioms:
7;   (a*c0) >> s1 | (a*c1)
8;   (a/c0) << s1 | (a/c1)
9; This targets cases where instcombine has folded a shl/srl/mul/udiv
10; with one of the shifts from the rotate idiom
11
12define i64 @rolq_extract_shl(i64 %i) nounwind {
13; X86-LABEL: rolq_extract_shl:
14; X86:       # %bb.0:
15; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
16; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
17; X86-NEXT:    movl %eax, %ecx
18; X86-NEXT:    shldl $3, %edx, %ecx
19; X86-NEXT:    shll $3, %eax
20; X86-NEXT:    shll $3, %edx
21; X86-NEXT:    shrdl $25, %edx, %eax
22; X86-NEXT:    shrdl $25, %ecx, %edx
23; X86-NEXT:    retl
24;
25; X64-LABEL: rolq_extract_shl:
26; X64:       # %bb.0:
27; X64-NEXT:    leaq (,%rdi,8), %rax
28; X64-NEXT:    rolq $7, %rax
29; X64-NEXT:    retq
30  %lhs_mul = shl i64 %i, 3
31  %rhs_mul = shl i64 %i, 10
32  %lhs_shift = lshr i64 %lhs_mul, 57
33  %out = or i64 %lhs_shift, %rhs_mul
34  ret i64 %out
35}
36
37define i16 @rolw_extract_shrl(i16 %i) nounwind {
38; X86-LABEL: rolw_extract_shrl:
39; X86:       # %bb.0:
40; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
41; X86-NEXT:    shrl $3, %eax
42; X86-NEXT:    rolw $12, %ax
43; X86-NEXT:    # kill: def $ax killed $ax killed $eax
44; X86-NEXT:    retl
45;
46; X64-LABEL: rolw_extract_shrl:
47; X64:       # %bb.0:
48; X64-NEXT:    movzwl %di, %eax
49; X64-NEXT:    shrl $3, %eax
50; X64-NEXT:    rolw $12, %ax
51; X64-NEXT:    # kill: def $ax killed $ax killed $eax
52; X64-NEXT:    retq
53  %lhs_div = lshr i16 %i, 7
54  %rhs_div = lshr i16 %i, 3
55  %rhs_shift = shl i16 %rhs_div, 12
56  %out = or i16 %lhs_div, %rhs_shift
57  ret i16 %out
58}
59
60define i32 @roll_extract_mul(i32 %i) nounwind {
61; X86-LABEL: roll_extract_mul:
62; X86:       # %bb.0:
63; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
64; X86-NEXT:    leal (%eax,%eax,8), %eax
65; X86-NEXT:    roll $7, %eax
66; X86-NEXT:    retl
67;
68; X64-LABEL: roll_extract_mul:
69; X64:       # %bb.0:
70; X64-NEXT:    # kill: def $edi killed $edi def $rdi
71; X64-NEXT:    leal (%rdi,%rdi,8), %eax
72; X64-NEXT:    roll $7, %eax
73; X64-NEXT:    retq
74  %lhs_mul = mul i32 %i, 9
75  %rhs_mul = mul i32 %i, 1152
76  %lhs_shift = lshr i32 %lhs_mul, 25
77  %out = or i32 %lhs_shift, %rhs_mul
78  ret i32 %out
79}
80
81define i8 @rolb_extract_udiv(i8 %i) nounwind {
82; X86-LABEL: rolb_extract_udiv:
83; X86:       # %bb.0:
84; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
85; X86-NEXT:    imull $171, %eax, %eax
86; X86-NEXT:    shrl $9, %eax
87; X86-NEXT:    rolb $4, %al
88; X86-NEXT:    # kill: def $al killed $al killed $eax
89; X86-NEXT:    retl
90;
91; X64-LABEL: rolb_extract_udiv:
92; X64:       # %bb.0:
93; X64-NEXT:    movzbl %dil, %eax
94; X64-NEXT:    imull $171, %eax, %eax
95; X64-NEXT:    shrl $9, %eax
96; X64-NEXT:    rolb $4, %al
97; X64-NEXT:    # kill: def $al killed $al killed $eax
98; X64-NEXT:    retq
99  %lhs_div = udiv i8 %i, 3
100  %rhs_div = udiv i8 %i, 48
101  %lhs_shift = shl i8 %lhs_div, 4
102  %out = or i8 %lhs_shift, %rhs_div
103  ret i8 %out
104}
105
106define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind {
107; X86-LABEL: rolq_extract_mul_with_mask:
108; X86:       # %bb.0:
109; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
110; X86-NEXT:    leal (%eax,%eax,8), %ecx
111; X86-NEXT:    movl $9, %eax
112; X86-NEXT:    mull {{[0-9]+}}(%esp)
113; X86-NEXT:    addl %ecx, %edx
114; X86-NEXT:    shrdl $25, %eax, %edx
115; X86-NEXT:    movzbl %dl, %eax
116; X86-NEXT:    xorl %edx, %edx
117; X86-NEXT:    retl
118;
119; X64-LABEL: rolq_extract_mul_with_mask:
120; X64:       # %bb.0:
121; X64-NEXT:    leaq (%rdi,%rdi,8), %rax
122; X64-NEXT:    rolq $7, %rax
123; X64-NEXT:    movzbl %al, %eax
124; X64-NEXT:    retq
125  %lhs_mul = mul i64 %i, 1152
126  %rhs_mul = mul i64 %i, 9
127  %lhs_and = and i64 %lhs_mul, 160
128  %rhs_shift = lshr i64 %rhs_mul, 57
129  %out = or i64 %lhs_and, %rhs_shift
130  ret i64 %out
131}
132
133; Result would undershift
134define i64 @no_extract_shl(i64 %i) nounwind {
135; X86-LABEL: no_extract_shl:
136; X86:       # %bb.0:
137; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
138; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
139; X86-NEXT:    movl %eax, %edx
140; X86-NEXT:    shldl $10, %ecx, %edx
141; X86-NEXT:    shll $10, %ecx
142; X86-NEXT:    shrl $20, %eax
143; X86-NEXT:    andl $127, %eax
144; X86-NEXT:    orl %ecx, %eax
145; X86-NEXT:    retl
146;
147; X64-LABEL: no_extract_shl:
148; X64:       # %bb.0:
149; X64-NEXT:    movq %rdi, %rax
150; X64-NEXT:    shlq $10, %rax
151; X64-NEXT:    shrq $52, %rdi
152; X64-NEXT:    andl $127, %edi
153; X64-NEXT:    orq %rdi, %rax
154; X64-NEXT:    retq
155  %lhs_mul = shl i64 %i, 5
156  %rhs_mul = shl i64 %i, 10
157  %lhs_shift = lshr i64 %lhs_mul, 57
158  %out = or i64 %lhs_shift, %rhs_mul
159  ret i64 %out
160}
161
162; Result would overshift
163define i32 @no_extract_shrl(i32 %i) nounwind {
164; X86-LABEL: no_extract_shrl:
165; X86:       # %bb.0:
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
167; X86-NEXT:    movl %eax, %ecx
168; X86-NEXT:    shrl $9, %ecx
169; X86-NEXT:    andl $-8, %eax
170; X86-NEXT:    shll $25, %eax
171; X86-NEXT:    orl %ecx, %eax
172; X86-NEXT:    retl
173;
174; X64-LABEL: no_extract_shrl:
175; X64:       # %bb.0:
176; X64-NEXT:    movl %edi, %eax
177; X64-NEXT:    shrl $9, %eax
178; X64-NEXT:    andl $-8, %edi
179; X64-NEXT:    shll $25, %edi
180; X64-NEXT:    orl %edi, %eax
181; X64-NEXT:    retq
182  %lhs_div = lshr i32 %i, 3
183  %rhs_div = lshr i32 %i, 9
184  %lhs_shift = shl i32 %lhs_div, 28
185  %out = or i32 %lhs_shift, %rhs_div
186  ret i32 %out
187}
188
189; Can factor 128 from 2304, but result is 18 instead of 9
190define i16 @no_extract_mul(i16 %i) nounwind {
191; X86-LABEL: no_extract_mul:
192; X86:       # %bb.0:
193; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
194; X86-NEXT:    leal (%eax,%eax,8), %ecx
195; X86-NEXT:    shll $8, %eax
196; X86-NEXT:    leal (%eax,%eax,8), %edx
197; X86-NEXT:    movzwl %cx, %eax
198; X86-NEXT:    shrl $9, %eax
199; X86-NEXT:    orl %edx, %eax
200; X86-NEXT:    # kill: def $ax killed $ax killed $eax
201; X86-NEXT:    retl
202;
203; X64-LABEL: no_extract_mul:
204; X64:       # %bb.0:
205; X64-NEXT:    # kill: def $edi killed $edi def $rdi
206; X64-NEXT:    leal (%rdi,%rdi,8), %eax
207; X64-NEXT:    # kill: def $edi killed $edi killed $rdi def $rdi
208; X64-NEXT:    shll $8, %edi
209; X64-NEXT:    leal (%rdi,%rdi,8), %ecx
210; X64-NEXT:    movzwl %ax, %eax
211; X64-NEXT:    shrl $9, %eax
212; X64-NEXT:    orl %ecx, %eax
213; X64-NEXT:    # kill: def $ax killed $ax killed $eax
214; X64-NEXT:    retq
215  %lhs_mul = mul i16 %i, 2304
216  %rhs_mul = mul i16 %i, 9
217  %rhs_shift = lshr i16 %rhs_mul, 9
218  %out = or i16 %lhs_mul, %rhs_shift
219  ret i16 %out
220}
221
222; Can't evenly factor 16 from 49
223define i8 @no_extract_udiv(i8 %i) nounwind {
224; X86-LABEL: no_extract_udiv:
225; X86:       # %bb.0:
226; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
227; X86-NEXT:    imull $171, %eax, %ecx
228; X86-NEXT:    imull $79, %eax, %edx
229; X86-NEXT:    subb %dh, %al
230; X86-NEXT:    shrb %al
231; X86-NEXT:    addb %dh, %al
232; X86-NEXT:    shrb $5, %al
233; X86-NEXT:    shlb $3, %ch
234; X86-NEXT:    orb %al, %ch
235; X86-NEXT:    andb $-9, %ch
236; X86-NEXT:    movb %ch, %al
237; X86-NEXT:    retl
238;
239; X64-LABEL: no_extract_udiv:
240; X64:       # %bb.0:
241; X64-NEXT:    movzbl %dil, %ecx
242; X64-NEXT:    imull $171, %ecx, %eax
243; X64-NEXT:    shrl $8, %eax
244; X64-NEXT:    imull $79, %ecx, %edx
245; X64-NEXT:    shrl $8, %edx
246; X64-NEXT:    subb %dl, %cl
247; X64-NEXT:    shrb %cl
248; X64-NEXT:    addb %dl, %cl
249; X64-NEXT:    shrb $5, %cl
250; X64-NEXT:    shlb $3, %al
251; X64-NEXT:    orb %cl, %al
252; X64-NEXT:    andb $-9, %al
253; X64-NEXT:    # kill: def $al killed $al killed $eax
254; X64-NEXT:    retq
255  %lhs_div = udiv i8 %i, 3
256  %rhs_div = udiv i8 %i, 49
257  %lhs_shift = shl i8 %lhs_div,4
258  %out = or i8 %lhs_shift, %rhs_div
259  ret i8 %out
260}
261
262; DAGCombiner transforms shl X, 1 into add X, X.
263define i32 @extract_add_1(i32 %i) nounwind {
264; X86-LABEL: extract_add_1:
265; X86:       # %bb.0:
266; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
267; X86-NEXT:    roll %eax
268; X86-NEXT:    retl
269;
270; X64-LABEL: extract_add_1:
271; X64:       # %bb.0:
272; X64-NEXT:    movl %edi, %eax
273; X64-NEXT:    roll %eax
274; X64-NEXT:    retq
275  %ii = add i32 %i, %i
276  %rhs = lshr i32 %i, 31
277  %out = or i32 %ii, %rhs
278  ret i32 %out
279}
280
281define i32 @extract_add_1_comut(i32 %i) nounwind {
282; X86-LABEL: extract_add_1_comut:
283; X86:       # %bb.0:
284; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
285; X86-NEXT:    roll %eax
286; X86-NEXT:    retl
287;
288; X64-LABEL: extract_add_1_comut:
289; X64:       # %bb.0:
290; X64-NEXT:    movl %edi, %eax
291; X64-NEXT:    roll %eax
292; X64-NEXT:    retq
293  %ii = add i32 %i, %i
294  %lhs = lshr i32 %i, 31
295  %out = or i32 %lhs, %ii
296  ret i32 %out
297}
298
299define i32 @no_extract_add_1(i32 %i) nounwind {
300; X86-LABEL: no_extract_add_1:
301; X86:       # %bb.0:
302; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X86-NEXT:    leal (%eax,%eax), %ecx
304; X86-NEXT:    shrl $27, %eax
305; X86-NEXT:    orl %ecx, %eax
306; X86-NEXT:    retl
307;
308; X64-LABEL: no_extract_add_1:
309; X64:       # %bb.0:
310; X64-NEXT:    # kill: def $edi killed $edi def $rdi
311; X64-NEXT:    leal (%rdi,%rdi), %eax
312; X64-NEXT:    shrl $27, %edi
313; X64-NEXT:    orl %edi, %eax
314; X64-NEXT:    retq
315  %ii = add i32 %i, %i
316  %rhs = lshr i32 %i, 27
317  %out = or i32 %ii, %rhs
318  ret i32 %out
319}
320