xref: /llvm-project/llvm/test/CodeGen/X86/legalize-shl-vec.ll (revision 9632f987161b4efeb8c087f19a3eb4f7c69cc920)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
4
5define <2 x i256> @test_shl(<2 x i256> %In) nounwind {
6; X86-LABEL: test_shl:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
11; X86-NEXT:    shldl $2, %ecx, %edx
12; X86-NEXT:    movl %edx, 60(%eax)
13; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
14; X86-NEXT:    shldl $2, %edx, %ecx
15; X86-NEXT:    movl %ecx, 56(%eax)
16; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
17; X86-NEXT:    shldl $2, %ecx, %edx
18; X86-NEXT:    movl %edx, 52(%eax)
19; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
20; X86-NEXT:    shldl $2, %edx, %ecx
21; X86-NEXT:    movl %ecx, 48(%eax)
22; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
23; X86-NEXT:    shldl $2, %ecx, %edx
24; X86-NEXT:    movl %edx, 44(%eax)
25; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
26; X86-NEXT:    shldl $2, %edx, %ecx
27; X86-NEXT:    movl %ecx, 40(%eax)
28; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
29; X86-NEXT:    shldl $2, %ecx, %edx
30; X86-NEXT:    movl %edx, 36(%eax)
31; X86-NEXT:    shll $2, %ecx
32; X86-NEXT:    movl %ecx, 32(%eax)
33; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
34; X86-NEXT:    shll $31, %ecx
35; X86-NEXT:    movl %ecx, 28(%eax)
36; X86-NEXT:    movl $0, 24(%eax)
37; X86-NEXT:    movl $0, 20(%eax)
38; X86-NEXT:    movl $0, 16(%eax)
39; X86-NEXT:    movl $0, 12(%eax)
40; X86-NEXT:    movl $0, 8(%eax)
41; X86-NEXT:    movl $0, 4(%eax)
42; X86-NEXT:    movl $0, (%eax)
43; X86-NEXT:    retl $4
44;
45; X64-LABEL: test_shl:
46; X64:       # %bb.0:
47; X64-NEXT:    movq %rdi, %rax
48; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
49; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
50; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
51; X64-NEXT:    shldq $2, %rdx, %rcx
52; X64-NEXT:    shldq $2, %rdi, %rdx
53; X64-NEXT:    shldq $2, %r9, %rdi
54; X64-NEXT:    shlq $63, %rsi
55; X64-NEXT:    shlq $2, %r9
56; X64-NEXT:    movq %rcx, 56(%rax)
57; X64-NEXT:    movq %rdx, 48(%rax)
58; X64-NEXT:    movq %rdi, 40(%rax)
59; X64-NEXT:    movq %r9, 32(%rax)
60; X64-NEXT:    movq %rsi, 24(%rax)
61; X64-NEXT:    xorps %xmm0, %xmm0
62; X64-NEXT:    movaps %xmm0, (%rax)
63; X64-NEXT:    movq $0, 16(%rax)
64; X64-NEXT:    retq
65  %Amt = insertelement <2 x i256> <i256 1, i256 2>, i256 255, i32 0
66  %Out = shl <2 x i256> %In, %Amt
67  ret <2 x i256> %Out
68}
69
70define <2 x i256> @test_srl(<2 x i256> %In) nounwind {
71; X86-LABEL: test_srl:
72; X86:       # %bb.0:
73; X86-NEXT:    pushl %ebp
74; X86-NEXT:    pushl %ebx
75; X86-NEXT:    pushl %edi
76; X86-NEXT:    pushl %esi
77; X86-NEXT:    subl $8, %esp
78; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
79; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
80; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
81; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
82; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
83; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
84; X86-NEXT:    movl %ebp, %esi
85; X86-NEXT:    shldl $28, %edx, %esi
86; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
87; X86-NEXT:    shldl $28, %ebx, %edx
88; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
89; X86-NEXT:    shldl $28, %ecx, %ebx
90; X86-NEXT:    movl %ecx, %esi
91; X86-NEXT:    shldl $28, %edi, %esi
92; X86-NEXT:    shldl $28, %eax, %edi
93; X86-NEXT:    movl %eax, %edx
94; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
95; X86-NEXT:    shldl $28, %eax, %edx
96; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
97; X86-NEXT:    shrdl $4, %eax, %ecx
98; X86-NEXT:    shrl $4, %ebp
99; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
100; X86-NEXT:    movl %ebp, 60(%eax)
101; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
102; X86-NEXT:    movl %ebp, 56(%eax)
103; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
104; X86-NEXT:    movl %ebp, 52(%eax)
105; X86-NEXT:    movl %ebx, 48(%eax)
106; X86-NEXT:    movl %esi, 44(%eax)
107; X86-NEXT:    movl %edi, 40(%eax)
108; X86-NEXT:    movl %edx, 36(%eax)
109; X86-NEXT:    movl %ecx, 32(%eax)
110; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
111; X86-NEXT:    shrl $31, %ecx
112; X86-NEXT:    movl %ecx, (%eax)
113; X86-NEXT:    movl $0, 28(%eax)
114; X86-NEXT:    movl $0, 24(%eax)
115; X86-NEXT:    movl $0, 20(%eax)
116; X86-NEXT:    movl $0, 16(%eax)
117; X86-NEXT:    movl $0, 12(%eax)
118; X86-NEXT:    movl $0, 8(%eax)
119; X86-NEXT:    movl $0, 4(%eax)
120; X86-NEXT:    addl $8, %esp
121; X86-NEXT:    popl %esi
122; X86-NEXT:    popl %edi
123; X86-NEXT:    popl %ebx
124; X86-NEXT:    popl %ebp
125; X86-NEXT:    retl $4
126;
127; X64-LABEL: test_srl:
128; X64:       # %bb.0:
129; X64-NEXT:    movq %rdi, %rax
130; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
131; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
132; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
133; X64-NEXT:    shrdq $4, %rsi, %r9
134; X64-NEXT:    shrdq $4, %rdx, %rsi
135; X64-NEXT:    shrdq $4, %rcx, %rdx
136; X64-NEXT:    shrq $63, %r8
137; X64-NEXT:    shrq $4, %rcx
138; X64-NEXT:    movq %rcx, 56(%rdi)
139; X64-NEXT:    movq %rdx, 48(%rdi)
140; X64-NEXT:    movq %rsi, 40(%rdi)
141; X64-NEXT:    movq %r9, 32(%rdi)
142; X64-NEXT:    movq %r8, (%rdi)
143; X64-NEXT:    xorps %xmm0, %xmm0
144; X64-NEXT:    movaps %xmm0, 16(%rdi)
145; X64-NEXT:    movq $0, 8(%rdi)
146; X64-NEXT:    retq
147  %Amt = insertelement <2 x i256> <i256 3, i256 4>, i256 255, i32 0
148  %Out = lshr <2 x i256> %In, %Amt
149  ret <2 x i256> %Out
150}
151
152define <2 x i256> @test_sra(<2 x i256> %In) nounwind {
153; X86-LABEL: test_sra:
154; X86:       # %bb.0:
155; X86-NEXT:    pushl %ebp
156; X86-NEXT:    pushl %ebx
157; X86-NEXT:    pushl %edi
158; X86-NEXT:    pushl %esi
159; X86-NEXT:    subl $8, %esp
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
162; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
163; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
164; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
165; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
166; X86-NEXT:    movl %ebp, %esi
167; X86-NEXT:    shldl $26, %edx, %esi
168; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
169; X86-NEXT:    shldl $26, %ebx, %edx
170; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
171; X86-NEXT:    shldl $26, %ecx, %ebx
172; X86-NEXT:    movl %ecx, %esi
173; X86-NEXT:    shldl $26, %edi, %esi
174; X86-NEXT:    shldl $26, %eax, %edi
175; X86-NEXT:    movl %eax, %edx
176; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
177; X86-NEXT:    shldl $26, %eax, %edx
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
179; X86-NEXT:    shrdl $6, %eax, %ecx
180; X86-NEXT:    sarl $6, %ebp
181; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
182; X86-NEXT:    movl %ebp, 60(%eax)
183; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
184; X86-NEXT:    movl %ebp, 56(%eax)
185; X86-NEXT:    movl (%esp), %ebp # 4-byte Reload
186; X86-NEXT:    movl %ebp, 52(%eax)
187; X86-NEXT:    movl %ebx, 48(%eax)
188; X86-NEXT:    movl %esi, 44(%eax)
189; X86-NEXT:    movl %edi, 40(%eax)
190; X86-NEXT:    movl %edx, 36(%eax)
191; X86-NEXT:    movl %ecx, 32(%eax)
192; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
193; X86-NEXT:    sarl $31, %ecx
194; X86-NEXT:    movl %ecx, 28(%eax)
195; X86-NEXT:    movl %ecx, 24(%eax)
196; X86-NEXT:    movl %ecx, 20(%eax)
197; X86-NEXT:    movl %ecx, 16(%eax)
198; X86-NEXT:    movl %ecx, 12(%eax)
199; X86-NEXT:    movl %ecx, 8(%eax)
200; X86-NEXT:    movl %ecx, 4(%eax)
201; X86-NEXT:    movl %ecx, (%eax)
202; X86-NEXT:    addl $8, %esp
203; X86-NEXT:    popl %esi
204; X86-NEXT:    popl %edi
205; X86-NEXT:    popl %ebx
206; X86-NEXT:    popl %ebp
207; X86-NEXT:    retl $4
208;
209; X64-LABEL: test_sra:
210; X64:       # %bb.0:
211; X64-NEXT:    movq %rdi, %rax
212; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
213; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
214; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
215; X64-NEXT:    shrdq $6, %rsi, %r9
216; X64-NEXT:    shrdq $6, %rdx, %rsi
217; X64-NEXT:    shrdq $6, %rcx, %rdx
218; X64-NEXT:    sarq $63, %r8
219; X64-NEXT:    sarq $6, %rcx
220; X64-NEXT:    movq %rcx, 56(%rdi)
221; X64-NEXT:    movq %rdx, 48(%rdi)
222; X64-NEXT:    movq %rsi, 40(%rdi)
223; X64-NEXT:    movq %r9, 32(%rdi)
224; X64-NEXT:    movq %r8, 24(%rdi)
225; X64-NEXT:    movq %r8, 16(%rdi)
226; X64-NEXT:    movq %r8, 8(%rdi)
227; X64-NEXT:    movq %r8, (%rdi)
228; X64-NEXT:    retq
229  %Amt = insertelement <2 x i256> <i256 5, i256 6>, i256 255, i32 0
230  %Out = ashr <2 x i256> %In, %Amt
231  ret <2 x i256> %Out
232}
233