xref: /llvm-project/llvm/test/CodeGen/X86/undo-mul-and.ll (revision 6c3bf364bf95209925b1e884077ec79cca274dc7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=CHECK,CHECK-SSE
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512
5
6define i32 @mul_and_to_neg_shl_and(i32 %x) {
7; CHECK-LABEL: mul_and_to_neg_shl_and:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
10; CHECK-NEXT:    negl %edi
11; CHECK-NEXT:    leal (,%rdi,8), %eax
12; CHECK-NEXT:    andl $56, %eax
13; CHECK-NEXT:    retq
14  %mul = mul i32 %x, 56
15  %and = and i32 %mul, 56
16  ret i32 %and
17}
18
19define i32 @mul_and_to_neg_shl_and2(i32 %x) {
20; CHECK-LABEL: mul_and_to_neg_shl_and2:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
23; CHECK-NEXT:    negl %edi
24; CHECK-NEXT:    leal (,%rdi,8), %eax
25; CHECK-NEXT:    andl $48, %eax
26; CHECK-NEXT:    retq
27  %mul = mul i32 %x, 56
28  %and = and i32 %mul, 51
29  ret i32 %and
30}
31
32define <4 x i32> @mul_and_to_neg_shl_and_vec(<4 x i32> %x) {
33; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec:
34; CHECK-SSE:       # %bb.0:
35; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
36; CHECK-SSE-NEXT:    psubd %xmm0, %xmm1
37; CHECK-SSE-NEXT:    pslld $3, %xmm1
38; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
39; CHECK-SSE-NEXT:    movdqa %xmm1, %xmm0
40; CHECK-SSE-NEXT:    retq
41;
42; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec:
43; CHECK-AVX1:       # %bb.0:
44; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
45; CHECK-AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
46; CHECK-AVX1-NEXT:    vpslld $3, %xmm0, %xmm0
47; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
48; CHECK-AVX1-NEXT:    retq
49;
50; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec:
51; CHECK-AVX512:       # %bb.0:
52; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
53; CHECK-AVX512-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
54; CHECK-AVX512-NEXT:    vpslld $3, %xmm0, %xmm0
55; CHECK-AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
56; CHECK-AVX512-NEXT:    retq
57  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
58  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
59  ret <4 x i32> %and
60}
61
62define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) {
63; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
64; CHECK-SSE:       # %bb.0:
65; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66; CHECK-SSE-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
67; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
68; CHECK-SSE-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
69; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
70; CHECK-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
71; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
72; CHECK-SSE-NEXT:    retq
73;
74; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
75; CHECK-AVX1:       # %bb.0:
76; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
77; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
78; CHECK-AVX1-NEXT:    retq
79;
80; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
81; CHECK-AVX512:       # %bb.0:
82; CHECK-AVX512-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
83; CHECK-AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
84; CHECK-AVX512-NEXT:    retq
85  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 64>
86  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
87  ret <4 x i32> %and
88}
89
90;; todo_no_splat ones have the correct invariants for all elements.
91define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) {
92; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
93; CHECK-SSE:       # %bb.0:
94; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
95; CHECK-SSE-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
97; CHECK-SSE-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
98; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
99; CHECK-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
100; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
101; CHECK-SSE-NEXT:    retq
102;
103; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
104; CHECK-AVX1:       # %bb.0:
105; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
106; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
107; CHECK-AVX1-NEXT:    retq
108;
109; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
110; CHECK-AVX512:       # %bb.0:
111; CHECK-AVX512-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
112; CHECK-AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
113; CHECK-AVX512-NEXT:    retq
114  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 48>
115  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
116  ret <4 x i32> %and
117}
118
119define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat2(<4 x i32> %x) {
120; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
121; CHECK-SSE:       # %bb.0:
122; CHECK-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [56,56,56,56]
123; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
124; CHECK-SSE-NEXT:    pmuludq %xmm1, %xmm0
125; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
126; CHECK-SSE-NEXT:    pmuludq %xmm1, %xmm2
127; CHECK-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
128; CHECK-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
129; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
130; CHECK-SSE-NEXT:    retq
131;
132; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
133; CHECK-AVX1:       # %bb.0:
134; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
135; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
136; CHECK-AVX1-NEXT:    retq
137;
138; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
139; CHECK-AVX512:       # %bb.0:
140; CHECK-AVX512-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
141; CHECK-AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
142; CHECK-AVX512-NEXT:    retq
143  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
144  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 32>
145  ret <4 x i32> %and
146}
147
148define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_mul(<4 x i32> %x) {
149; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
150; CHECK-SSE:       # %bb.0:
151; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
152; CHECK-SSE-NEXT:    psubd %xmm0, %xmm1
153; CHECK-SSE-NEXT:    pslld $3, %xmm1
154; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
155; CHECK-SSE-NEXT:    movdqa %xmm1, %xmm0
156; CHECK-SSE-NEXT:    retq
157;
158; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
159; CHECK-AVX1:       # %bb.0:
160; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
161; CHECK-AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
162; CHECK-AVX1-NEXT:    vpslld $3, %xmm0, %xmm0
163; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
164; CHECK-AVX1-NEXT:    retq
165;
166; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
167; CHECK-AVX512:       # %bb.0:
168; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
169; CHECK-AVX512-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
170; CHECK-AVX512-NEXT:    vpslld $3, %xmm0, %xmm0
171; CHECK-AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
172; CHECK-AVX512-NEXT:    retq
173  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 undef>
174  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
175  ret <4 x i32> %and
176}
177
178define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_and(<4 x i32> %x) {
179; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
180; CHECK-SSE:       # %bb.0:
181; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
182; CHECK-SSE-NEXT:    psubd %xmm0, %xmm1
183; CHECK-SSE-NEXT:    pslld $3, %xmm1
184; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
185; CHECK-SSE-NEXT:    movdqa %xmm1, %xmm0
186; CHECK-SSE-NEXT:    retq
187;
188; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
189; CHECK-AVX1:       # %bb.0:
190; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
191; CHECK-AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
192; CHECK-AVX1-NEXT:    vpslld $3, %xmm0, %xmm0
193; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
194; CHECK-AVX1-NEXT:    retq
195;
196; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
197; CHECK-AVX512:       # %bb.0:
198; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
199; CHECK-AVX512-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
200; CHECK-AVX512-NEXT:    vpslld $3, %xmm0, %xmm0
201; CHECK-AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
202; CHECK-AVX512-NEXT:    retq
203  %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
204  %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 undef>
205  ret <4 x i32> %and
206}
207
208define <16 x i8> @mul_and_to_neg_shl_and_vec_with_undef_mul_and(<16 x i8> %x) {
209; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and:
210; CHECK-SSE:       # %bb.0:
211; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
212; CHECK-SSE-NEXT:    psubb %xmm0, %xmm1
213; CHECK-SSE-NEXT:    psllw $2, %xmm1
214; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
215; CHECK-SSE-NEXT:    movdqa %xmm1, %xmm0
216; CHECK-SSE-NEXT:    retq
217;
218; CHECK-AVX-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and:
219; CHECK-AVX:       # %bb.0:
220; CHECK-AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
221; CHECK-AVX-NEXT:    vpsubb %xmm0, %xmm1, %xmm0
222; CHECK-AVX-NEXT:    vpsllw $2, %xmm0, %xmm0
223; CHECK-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
224; CHECK-AVX-NEXT:    retq
225  %mul = mul <16 x i8> %x, <i8 12, i8 12, i8 12, i8 12, i8 undef, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12>
226  %and = and <16 x i8> %mul, <i8 11, i8 undef, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11>
227  ret <16 x i8> %and
228}
229
230define i32 @mul_and_to_neg_shl_and_fail_invalid_mul(i32 %x) {
231; CHECK-LABEL: mul_and_to_neg_shl_and_fail_invalid_mul:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    imull $57, %edi, %eax
234; CHECK-NEXT:    andl $56, %eax
235; CHECK-NEXT:    retq
236  %mul = mul i32 %x, 57
237  %and = and i32 %mul, 56
238  ret i32 %and
239}
240
241define i32 @mul_and_to_neg_shl_and_fail_mul_p2(i32 %x) {
242; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mul_p2:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    movl %edi, %eax
245; CHECK-NEXT:    shll $6, %eax
246; CHECK-NEXT:    andl $64, %eax
247; CHECK-NEXT:    retq
248  %mul = mul i32 %x, 64
249  %and = and i32 %mul, 64
250  ret i32 %and
251}
252
253define i32 @mul_and_to_neg_shl_and_fail_mask_to_large(i32 %x) {
254; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mask_to_large:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    imull $56, %edi, %eax
257; CHECK-NEXT:    andl $120, %eax
258; CHECK-NEXT:    retq
259  %mul = mul i32 %x, 56
260  %and = and i32 %mul, 120
261  ret i32 %and
262}
263