xref: /llvm-project/llvm/test/CodeGen/X86/fold-and-shift.ll (revision d96529af3c362c53ef2e8c883a9e571fb3626927)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
3; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
4
5define i32 @t1(ptr %X, i32 %i) {
6; X86-LABEL: t1:
7; X86:       # %bb.0: # %entry
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
10; X86-NEXT:    movzbl %cl, %ecx
11; X86-NEXT:    movl (%eax,%ecx,4), %eax
12; X86-NEXT:    retl
13;
14; X64-LABEL: t1:
15; X64:       # %bb.0: # %entry
16; X64-NEXT:    movzbl %sil, %eax
17; X64-NEXT:    movl (%rdi,%rax,4), %eax
18; X64-NEXT:    retq
19entry:
20  %tmp2 = shl i32 %i, 2
21  %tmp4 = and i32 %tmp2, 1020
22  %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
23  %tmp9 = load i32, ptr %tmp7
24  ret i32 %tmp9
25}
26
27define i32 @t2(ptr %X, i32 %i) {
28; X86-LABEL: t2:
29; X86:       # %bb.0: # %entry
30; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
31; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
32; X86-NEXT:    movzwl %cx, %ecx
33; X86-NEXT:    movl (%eax,%ecx,4), %eax
34; X86-NEXT:    retl
35;
36; X64-LABEL: t2:
37; X64:       # %bb.0: # %entry
38; X64-NEXT:    movzwl %si, %eax
39; X64-NEXT:    movl (%rdi,%rax,4), %eax
40; X64-NEXT:    retq
41entry:
42  %tmp2 = shl i32 %i, 1
43  %tmp4 = and i32 %tmp2, 131070
44  %tmp7 = getelementptr i16, ptr %X, i32 %tmp4
45  %tmp9 = load i32, ptr %tmp7
46  ret i32 %tmp9
47}
48
49; This case is tricky. The lshr followed by a gep will produce a lshr followed
50; by an and to remove the low bits. This can be simplified by doing the lshr by
51; a greater constant and using the addressing mode to scale the result back up.
52; To make matters worse, because of the two-phase zext of %i and their reuse in
53; the function, the DAG can get confusing trying to re-use both of them and
54; prevent easy analysis of the mask in order to match this.
55define i32 @t3(ptr %i.ptr, ptr %arr) {
56; X86-LABEL: t3:
57; X86:       # %bb.0: # %entry
58; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
59; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X86-NEXT:    movzwl (%eax), %eax
61; X86-NEXT:    movl %eax, %edx
62; X86-NEXT:    shrl $11, %edx
63; X86-NEXT:    addl (%ecx,%edx,4), %eax
64; X86-NEXT:    retl
65;
66; X64-LABEL: t3:
67; X64:       # %bb.0: # %entry
68; X64-NEXT:    movzwl (%rdi), %eax
69; X64-NEXT:    movl %eax, %ecx
70; X64-NEXT:    shrl $11, %ecx
71; X64-NEXT:    addl (%rsi,%rcx,4), %eax
72; X64-NEXT:    retq
73entry:
74  %i = load i16, ptr %i.ptr
75  %i.zext = zext i16 %i to i32
76  %index = lshr i32 %i.zext, 11
77  %val.ptr = getelementptr inbounds i32, ptr %arr, i32 %index
78  %val = load i32, ptr %val.ptr
79  %sum = add i32 %val, %i.zext
80  ret i32 %sum
81}
82
83; A version of @t3 that has more zero extends and more re-use of intermediate
84; values. This exercise slightly different bits of canonicalization.
85define i32 @t4(ptr %i.ptr, ptr %arr) {
86; X86-LABEL: t4:
87; X86:       # %bb.0: # %entry
88; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
89; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
90; X86-NEXT:    movzwl (%eax), %eax
91; X86-NEXT:    movl %eax, %edx
92; X86-NEXT:    shrl $11, %edx
93; X86-NEXT:    addl (%ecx,%edx,4), %eax
94; X86-NEXT:    addl %edx, %eax
95; X86-NEXT:    retl
96;
97; X64-LABEL: t4:
98; X64:       # %bb.0: # %entry
99; X64-NEXT:    movzwl (%rdi), %eax
100; X64-NEXT:    movl %eax, %ecx
101; X64-NEXT:    shrl $11, %ecx
102; X64-NEXT:    addl (%rsi,%rcx,4), %eax
103; X64-NEXT:    addl %ecx, %eax
104; X64-NEXT:    retq
105entry:
106  %i = load i16, ptr %i.ptr
107  %i.zext = zext i16 %i to i32
108  %index = lshr i32 %i.zext, 11
109  %index.zext = zext i32 %index to i64
110  %val.ptr = getelementptr inbounds i32, ptr %arr, i64 %index.zext
111  %val = load i32, ptr %val.ptr
112  %sum.1 = add i32 %val, %i.zext
113  %sum.2 = add i32 %sum.1, %index
114  ret i32 %sum.2
115}
116
117define i8 @t5(ptr %X, i32 %i) {
118; X86-LABEL: t5:
119; X86:       # %bb.0: # %entry
120; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
122; X86-NEXT:    andl $-14, %ecx
123; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
124; X86-NEXT:    retl
125;
126; X64-LABEL: t5:
127; X64:       # %bb.0: # %entry
128; X64-NEXT:    shll $2, %esi
129; X64-NEXT:    andl $-56, %esi
130; X64-NEXT:    movslq %esi, %rax
131; X64-NEXT:    movzbl (%rdi,%rax), %eax
132; X64-NEXT:    retq
133entry:
134  %tmp2 = shl i32 %i, 2
135  %tmp4 = and i32 %tmp2, -56
136  %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
137  %tmp9 = load i8, ptr %tmp7
138  ret i8 %tmp9
139}
140
141define i8 @t6(ptr %X, i32 %i) {
142; X86-LABEL: t6:
143; X86:       # %bb.0: # %entry
144; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; X86-NEXT:    movl $-255, %ecx
146; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
147; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
148; X86-NEXT:    retl
149;
150; X64-LABEL: t6:
151; X64:       # %bb.0: # %entry
152; X64-NEXT:    shll $2, %esi
153; X64-NEXT:    andl $-1020, %esi # imm = 0xFC04
154; X64-NEXT:    movslq %esi, %rax
155; X64-NEXT:    movzbl (%rdi,%rax), %eax
156; X64-NEXT:    retq
157entry:
158  %tmp2 = shl i32 %i, 2
159  %tmp4 = and i32 %tmp2, -1020
160  %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
161  %tmp9 = load i8, ptr %tmp7
162  ret i8 %tmp9
163}
164