xref: /llvm-project/llvm/test/CodeGen/X86/x86-shifts.ll (revision 74fe1da01eb149a2234fc0f9570c84a08692e782)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
4
5; Splat patterns below
6
7define <4 x i32> @shl4(<4 x i32> %A) nounwind {
8; CHECK-LABEL: shl4:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    movdqa %xmm0, %xmm1
11; CHECK-NEXT:    pslld $2, %xmm1
12; CHECK-NEXT:    paddd %xmm0, %xmm0
13; CHECK-NEXT:    pxor %xmm1, %xmm0
14; CHECK-NEXT:    ret{{[l|q]}}
15entry:
16  %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
17  %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
18  %K = xor <4 x i32> %B, %C
19  ret <4 x i32> %K
20}
21
22define <4 x i32> @shr4(<4 x i32> %A) nounwind {
23; CHECK-LABEL: shr4:
24; CHECK:       # %bb.0: # %entry
25; CHECK-NEXT:    movdqa %xmm0, %xmm1
26; CHECK-NEXT:    psrld $2, %xmm1
27; CHECK-NEXT:    psrld $1, %xmm0
28; CHECK-NEXT:    pxor %xmm1, %xmm0
29; CHECK-NEXT:    ret{{[l|q]}}
30entry:
31  %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
32  %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
33  %K = xor <4 x i32> %B, %C
34  ret <4 x i32> %K
35}
36
37define <4 x i32> @sra4(<4 x i32> %A) nounwind {
38; CHECK-LABEL: sra4:
39; CHECK:       # %bb.0: # %entry
40; CHECK-NEXT:    movdqa %xmm0, %xmm1
41; CHECK-NEXT:    psrad $2, %xmm1
42; CHECK-NEXT:    psrad $1, %xmm0
43; CHECK-NEXT:    pxor %xmm1, %xmm0
44; CHECK-NEXT:    ret{{[l|q]}}
45entry:
46  %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
47  %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
48  %K = xor <4 x i32> %B, %C
49  ret <4 x i32> %K
50}
51
52define <2 x i64> @shl2(<2 x i64> %A) nounwind {
53; CHECK-LABEL: shl2:
54; CHECK:       # %bb.0: # %entry
55; CHECK-NEXT:    movdqa %xmm0, %xmm1
56; CHECK-NEXT:    psllq $2, %xmm1
57; CHECK-NEXT:    psllq $9, %xmm0
58; CHECK-NEXT:    pxor %xmm1, %xmm0
59; CHECK-NEXT:    ret{{[l|q]}}
60entry:
61  %B = shl <2 x i64> %A,  < i64 2, i64 2>
62  %C = shl <2 x i64> %A,  < i64 9, i64 9>
63  %K = xor <2 x i64> %B, %C
64  ret <2 x i64> %K
65}
66
67define <2 x i64> @shr2(<2 x i64> %A) nounwind {
68; CHECK-LABEL: shr2:
69; CHECK:       # %bb.0: # %entry
70; CHECK-NEXT:    movdqa %xmm0, %xmm1
71; CHECK-NEXT:    psrlq $8, %xmm1
72; CHECK-NEXT:    psrlq $1, %xmm0
73; CHECK-NEXT:    pxor %xmm1, %xmm0
74; CHECK-NEXT:    ret{{[l|q]}}
75entry:
76  %B = lshr <2 x i64> %A,  < i64 8, i64 8>
77  %C = lshr <2 x i64> %A,  < i64 1, i64 1>
78  %K = xor <2 x i64> %B, %C
79  ret <2 x i64> %K
80}
81
82define <8 x i16> @shl8(<8 x i16> %A) nounwind {
83; CHECK-LABEL: shl8:
84; CHECK:       # %bb.0: # %entry
85; CHECK-NEXT:    movdqa %xmm0, %xmm1
86; CHECK-NEXT:    psllw $2, %xmm1
87; CHECK-NEXT:    paddw %xmm0, %xmm0
88; CHECK-NEXT:    pxor %xmm1, %xmm0
89; CHECK-NEXT:    ret{{[l|q]}}
90entry:
91  %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
92  %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
93  %K = xor <8 x i16> %B, %C
94  ret <8 x i16> %K
95}
96
97define <8 x i16> @shr8(<8 x i16> %A) nounwind {
98; CHECK-LABEL: shr8:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    movdqa %xmm0, %xmm1
101; CHECK-NEXT:    psrlw $2, %xmm1
102; CHECK-NEXT:    psrlw $1, %xmm0
103; CHECK-NEXT:    pxor %xmm1, %xmm0
104; CHECK-NEXT:    ret{{[l|q]}}
105entry:
106  %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
107  %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
108  %K = xor <8 x i16> %B, %C
109  ret <8 x i16> %K
110}
111
112define <8 x i16> @sra8(<8 x i16> %A) nounwind {
113; CHECK-LABEL: sra8:
114; CHECK:       # %bb.0: # %entry
115; CHECK-NEXT:    movdqa %xmm0, %xmm1
116; CHECK-NEXT:    psraw $2, %xmm1
117; CHECK-NEXT:    psraw $1, %xmm0
118; CHECK-NEXT:    pxor %xmm1, %xmm0
119; CHECK-NEXT:    ret{{[l|q]}}
120entry:
121  %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
122  %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123  %K = xor <8 x i16> %B, %C
124  ret <8 x i16> %K
125}
126
127; non-splat test
128
129define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
130; X86-LABEL: sll8_nosplat:
131; X86:       # %bb.0: # %entry
132; X86-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
133; X86-NEXT:    pmullw %xmm0, %xmm1
134; X86-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [512,128,32,2,16,2,2,2]
135; X86-NEXT:    pxor %xmm1, %xmm0
136; X86-NEXT:    retl
137;
138; X64-LABEL: sll8_nosplat:
139; X64:       # %bb.0: # %entry
140; X64-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
141; X64-NEXT:    pmullw %xmm0, %xmm1
142; X64-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,128,32,2,16,2,2,2]
143; X64-NEXT:    pxor %xmm1, %xmm0
144; X64-NEXT:    retq
145entry:
146  %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
147  %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
148  %K = xor <8 x i16> %B, %C
149  ret <8 x i16> %K
150}
151
152define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
153; CHECK-LABEL: shr2_nosplat:
154; CHECK:       # %bb.0: # %entry
155; CHECK-NEXT:    movdqa %xmm0, %xmm1
156; CHECK-NEXT:    psrlq $8, %xmm1
157; CHECK-NEXT:    movdqa %xmm0, %xmm2
158; CHECK-NEXT:    psrlq $1, %xmm2
159; CHECK-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
160; CHECK-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
161; CHECK-NEXT:    xorpd %xmm1, %xmm0
162; CHECK-NEXT:    ret{{[l|q]}}
163entry:
164  %B = lshr <2 x i64> %A,  < i64 8, i64 1>
165  %C = lshr <2 x i64> %A,  < i64 1, i64 0>
166  %K = xor <2 x i64> %B, %C
167  ret <2 x i64> %K
168}
169
170; Other shifts
171
172define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
173; CHECK-LABEL: shl2_other:
174; CHECK:       # %bb.0: # %entry
175; CHECK-NEXT:    movdqa %xmm0, %xmm1
176; CHECK-NEXT:    pslld $2, %xmm1
177; CHECK-NEXT:    pslld $9, %xmm0
178; CHECK-NEXT:    pxor %xmm1, %xmm0
179; CHECK-NEXT:    ret{{[l|q]}}
180entry:
181  %B = shl <2 x i32> %A,  < i32 2, i32 2>
182  %C = shl <2 x i32> %A,  < i32 9, i32 9>
183  %K = xor <2 x i32> %B, %C
184  ret <2 x i32> %K
185}
186
187define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
188; CHECK-LABEL: shr2_other:
189; CHECK:       # %bb.0: # %entry
190; CHECK-NEXT:    movdqa %xmm0, %xmm1
191; CHECK-NEXT:    psrld $8, %xmm1
192; CHECK-NEXT:    psrld $1, %xmm0
193; CHECK-NEXT:    pxor %xmm1, %xmm0
194; CHECK-NEXT:    ret{{[l|q]}}
195entry:
196  %B = lshr <2 x i32> %A,  < i32 8, i32 8>
197  %C = lshr <2 x i32> %A,  < i32 1, i32 1>
198  %K = xor <2 x i32> %B, %C
199  ret <2 x i32> %K
200}
201
202define <16 x i8> @shl9(<16 x i8> %A) nounwind {
203; X86-LABEL: shl9:
204; X86:       # %bb.0:
205; X86-NEXT:    psllw $3, %xmm0
206; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
207; X86-NEXT:    retl
208;
209; X64-LABEL: shl9:
210; X64:       # %bb.0:
211; X64-NEXT:    psllw $3, %xmm0
212; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
213; X64-NEXT:    retq
214  %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
215  ret <16 x i8> %B
216}
217
218define <16 x i8> @shr9(<16 x i8> %A) nounwind {
219; X86-LABEL: shr9:
220; X86:       # %bb.0:
221; X86-NEXT:    psrlw $3, %xmm0
222; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
223; X86-NEXT:    retl
224;
225; X64-LABEL: shr9:
226; X64:       # %bb.0:
227; X64-NEXT:    psrlw $3, %xmm0
228; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
229; X64-NEXT:    retq
230  %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
231  ret <16 x i8> %B
232}
233
234define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
235; CHECK-LABEL: sra_v16i8_7:
236; CHECK:       # %bb.0:
237; CHECK-NEXT:    pxor %xmm1, %xmm1
238; CHECK-NEXT:    pcmpgtb %xmm0, %xmm1
239; CHECK-NEXT:    movdqa %xmm1, %xmm0
240; CHECK-NEXT:    ret{{[l|q]}}
241  %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
242  ret <16 x i8> %B
243}
244
245define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
246; X86-LABEL: sra_v16i8:
247; X86:       # %bb.0:
248; X86-NEXT:    psrlw $3, %xmm0
249; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
250; X86-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
251; X86-NEXT:    pxor %xmm1, %xmm0
252; X86-NEXT:    psubb %xmm1, %xmm0
253; X86-NEXT:    retl
254;
255; X64-LABEL: sra_v16i8:
256; X64:       # %bb.0:
257; X64-NEXT:    psrlw $3, %xmm0
258; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
259; X64-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
260; X64-NEXT:    pxor %xmm1, %xmm0
261; X64-NEXT:    psubb %xmm1, %xmm0
262; X64-NEXT:    retq
263  %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
264  ret <16 x i8> %B
265}
266