xref: /llvm-project/llvm/test/CodeGen/X86/combine-ptest-256.ll (revision 33cc733bc46b49e3198843257152c95b07e17471)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx    | FileCheck %s --check-prefixes=CHECK,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=CHECK,AVX2
4
5;
6; testz(~X,Y) -> testc(X,Y)
7;
8
9define i32 @ptestz_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
10; CHECK-LABEL: ptestz_256_invert0:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    movl %edi, %eax
13; CHECK-NEXT:    vptest %ymm1, %ymm0
14; CHECK-NEXT:    cmovael %esi, %eax
15; CHECK-NEXT:    vzeroupper
16; CHECK-NEXT:    retq
17  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
18  %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %d)
19  %t3 = icmp ne i32 %t2, 0
20  %t4 = select i1 %t3, i32 %a, i32 %b
21  ret i32 %t4
22}
23
24;
25; testz(X,~Y) -> testc(Y,X)
26;
27
28define i32 @ptestz_256_invert1(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
29; CHECK-LABEL: ptestz_256_invert1:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    movl %edi, %eax
32; CHECK-NEXT:    vptest %ymm0, %ymm1
33; CHECK-NEXT:    cmovael %esi, %eax
34; CHECK-NEXT:    vzeroupper
35; CHECK-NEXT:    retq
36  %t1 = xor <4 x i64> %d, <i64 -1, i64 -1, i64 -1, i64 -1>
37  %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> %t1)
38  %t3 = icmp ne i32 %t2, 0
39  %t4 = select i1 %t3, i32 %a, i32 %b
40  ret i32 %t4
41}
42
43;
44; testc(~X,Y) -> testz(X,Y)
45;
46
47define i32 @ptestc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
48; CHECK-LABEL: ptestc_256_invert0:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    movl %edi, %eax
51; CHECK-NEXT:    vptest %ymm1, %ymm0
52; CHECK-NEXT:    cmovnel %esi, %eax
53; CHECK-NEXT:    vzeroupper
54; CHECK-NEXT:    retq
55  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
56  %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %t1, <4 x i64> %d)
57  %t3 = icmp ne i32 %t2, 0
58  %t4 = select i1 %t3, i32 %a, i32 %b
59  ret i32 %t4
60}
61
62;
63; testnzc(~X,Y) -> testnzc(X,Y)
64;
65
66define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
67; CHECK-LABEL: ptestnzc_256_invert0:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    movl %edi, %eax
70; CHECK-NEXT:    vptest %ymm1, %ymm0
71; CHECK-NEXT:    cmovbel %esi, %eax
72; CHECK-NEXT:    vzeroupper
73; CHECK-NEXT:    retq
74  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
75  %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
76  %t3 = icmp ne i32 %t2, 0
77  %t4 = select i1 %t3, i32 %a, i32 %b
78  ret i32 %t4
79}
80
81define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
82; CHECK-LABEL: ptestnzc_256_invert0_commute:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    movl %edi, %eax
85; CHECK-NEXT:    vptest %ymm1, %ymm0
86; CHECK-NEXT:    cmoval %esi, %eax
87; CHECK-NEXT:    vzeroupper
88; CHECK-NEXT:    retq
89  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
90  %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
91  %t3 = icmp eq i32 %t2, 0
92  %t4 = select i1 %t3, i32 %a, i32 %b
93  ret i32 %t4
94}
95
96;
97; testc(X,~X) -> testc(X,-1)
98;
99
100define i32 @ptestc_256_not(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
101; AVX1-LABEL: ptestc_256_not:
102; AVX1:       # %bb.0:
103; AVX1-NEXT:    movl %edi, %eax
104; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
105; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
106; AVX1-NEXT:    vptest %ymm1, %ymm0
107; AVX1-NEXT:    cmovael %esi, %eax
108; AVX1-NEXT:    vzeroupper
109; AVX1-NEXT:    retq
110;
111; AVX2-LABEL: ptestc_256_not:
112; AVX2:       # %bb.0:
113; AVX2-NEXT:    movl %edi, %eax
114; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
115; AVX2-NEXT:    vptest %ymm1, %ymm0
116; AVX2-NEXT:    cmovael %esi, %eax
117; AVX2-NEXT:    vzeroupper
118; AVX2-NEXT:    retq
119  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
120  %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %c, <4 x i64> %t1)
121  %t3 = icmp ne i32 %t2, 0
122  %t4 = select i1 %t3, i32 %a, i32 %b
123  ret i32 %t4
124}
125
126;
127; testz(AND(X,Y),AND(X,Y)) -> testz(X,Y)
128;
129
130define i32 @ptestz_256_and(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
131; CHECK-LABEL: ptestz_256_and:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    movl %edi, %eax
134; CHECK-NEXT:    vptest %ymm1, %ymm0
135; CHECK-NEXT:    cmovel %esi, %eax
136; CHECK-NEXT:    vzeroupper
137; CHECK-NEXT:    retq
138  %t1 = and <4 x i64> %c, %d
139  %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %t1)
140  %t3 = icmp eq i32 %t2, 0
141  %t4 = select i1 %t3, i32 %a, i32 %b
142  ret i32 %t4
143}
144
145;
146; testz(AND(~X,Y),AND(~X,Y)) -> testc(X,Y)
147;
148
149define i32 @ptestz_256_andc(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
150; CHECK-LABEL: ptestz_256_andc:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    movl %edi, %eax
153; CHECK-NEXT:    vptest %ymm1, %ymm0
154; CHECK-NEXT:    cmovbl %esi, %eax
155; CHECK-NEXT:    vzeroupper
156; CHECK-NEXT:    retq
157  %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
158  %t2 = and <4 x i64> %t1, %d
159  %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> %t2)
160  %t4 = icmp eq i32 %t3, 0
161  %t5 = select i1 %t4, i32 %a, i32 %b
162  ret i32 %t5
163}
164
165;
166; testz(-1,X) -> testz(X,X)
167;
168
169define i32 @ptestz_256_allones0(<4 x i64> %c, i32 %a, i32 %b) {
170; CHECK-LABEL: ptestz_256_allones0:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    movl %edi, %eax
173; CHECK-NEXT:    vptest %ymm0, %ymm0
174; CHECK-NEXT:    cmovnel %esi, %eax
175; CHECK-NEXT:    vzeroupper
176; CHECK-NEXT:    retq
177  %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %c)
178  %t2 = icmp ne i32 %t1, 0
179  %t3 = select i1 %t2, i32 %a, i32 %b
180  ret i32 %t3
181}
182
183;
184; testz(X,-1) -> testz(X,X)
185;
186
187define i32 @ptestz_256_allones1(<4 x i64> %c, i32 %a, i32 %b) {
188; CHECK-LABEL: ptestz_256_allones1:
189; CHECK:       # %bb.0:
190; CHECK-NEXT:    movl %edi, %eax
191; CHECK-NEXT:    vptest %ymm0, %ymm0
192; CHECK-NEXT:    cmovnel %esi, %eax
193; CHECK-NEXT:    vzeroupper
194; CHECK-NEXT:    retq
195  %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
196  %t2 = icmp ne i32 %t1, 0
197  %t3 = select i1 %t2, i32 %a, i32 %b
198  ret i32 %t3
199}
200
201;
202; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
203;
204
205define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) {
206; AVX1-LABEL: ptestz_v8i32_signbits:
207; AVX1:       # %bb.0:
208; AVX1-NEXT:    movl %edi, %eax
209; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
210; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
211; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
212; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
213; AVX1-NEXT:    vptest %ymm0, %ymm0
214; AVX1-NEXT:    cmovnel %esi, %eax
215; AVX1-NEXT:    vzeroupper
216; AVX1-NEXT:    retq
217;
218; AVX2-LABEL: ptestz_v8i32_signbits:
219; AVX2:       # %bb.0:
220; AVX2-NEXT:    movl %edi, %eax
221; AVX2-NEXT:    vtestps %ymm0, %ymm0
222; AVX2-NEXT:    cmovnel %esi, %eax
223; AVX2-NEXT:    vzeroupper
224; AVX2-NEXT:    retq
225  %t1 = ashr <8 x i32> %c, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
226  %t2 = bitcast <8 x i32> %t1 to <4 x i64>
227  %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
228  %t4 = icmp ne i32 %t3, 0
229  %t5 = select i1 %t4, i32 %a, i32 %b
230  ret i32 %t5
231}
232
233define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) {
234; AVX1-LABEL: ptestz_v32i8_signbits:
235; AVX1:       # %bb.0:
236; AVX1-NEXT:    movl %edi, %eax
237; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
238; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
239; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
240; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
241; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
242; AVX1-NEXT:    vptest %ymm0, %ymm0
243; AVX1-NEXT:    cmovnel %esi, %eax
244; AVX1-NEXT:    vzeroupper
245; AVX1-NEXT:    retq
246;
247; AVX2-LABEL: ptestz_v32i8_signbits:
248; AVX2:       # %bb.0:
249; AVX2-NEXT:    movl %edi, %eax
250; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
251; AVX2-NEXT:    testl %ecx, %ecx
252; AVX2-NEXT:    cmovnel %esi, %eax
253; AVX2-NEXT:    vzeroupper
254; AVX2-NEXT:    retq
255  %t1 = ashr <32 x i8> %c, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
256  %t2 = bitcast <32 x i8> %t1 to <4 x i64>
257  %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
258  %t4 = icmp ne i32 %t3, 0
259  %t5 = select i1 %t4, i32 %a, i32 %b
260  ret i32 %t5
261}
262
263;
264; testz(or(extract_lo(X),extract_hi(X),or(extract_lo(Y),extract_hi(Y)) -> testz(X,Y)
265;
266
267; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1))
268define i1 @PR38788(<16 x i16> %0, <16 x i16> %1) {
269; AVX1-LABEL: PR38788:
270; AVX1:       # %bb.0:
271; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
272; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
273; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
274; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
275; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
276; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
277; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
278; AVX1-NEXT:    vptest %ymm1, %ymm0
279; AVX1-NEXT:    setae %al
280; AVX1-NEXT:    vzeroupper
281; AVX1-NEXT:    retq
282;
283; AVX2-LABEL: PR38788:
284; AVX2:       # %bb.0:
285; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
286; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
287; AVX2-NEXT:    vptest %ymm1, %ymm0
288; AVX2-NEXT:    setae %al
289; AVX2-NEXT:    vzeroupper
290; AVX2-NEXT:    retq
291  %3 = icmp eq <16 x i16> %0, %1
292  %4 = sext <16 x i1> %3 to <16 x i16>
293  %5 = bitcast <16 x i16> %4 to <4 x i64>
294  %6 = tail call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %5, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
295  %7 = icmp eq i32 %6, 0
296  ret i1 %7
297}
298
299declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>)
300declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>)
301declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>)
302