xref: /llvm-project/llvm/test/CodeGen/X86/combine-ptest.ll (revision 6adeda8f5505592d68676cce336c07a9dc651b26)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX
6
7;
8; testz(~X,Y) -> testc(X,Y)
9;
10
11define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
12; SSE-LABEL: ptestz_128_invert0:
13; SSE:       # %bb.0:
14; SSE-NEXT:    movl %edi, %eax
15; SSE-NEXT:    ptest %xmm1, %xmm0
16; SSE-NEXT:    cmovael %esi, %eax
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: ptestz_128_invert0:
20; AVX:       # %bb.0:
21; AVX-NEXT:    movl %edi, %eax
22; AVX-NEXT:    vptest %xmm1, %xmm0
23; AVX-NEXT:    cmovael %esi, %eax
24; AVX-NEXT:    retq
25  %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
26  %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d)
27  %t3 = icmp ne i32 %t2, 0
28  %t4 = select i1 %t3, i32 %a, i32 %b
29  ret i32 %t4
30}
31
32;
33; testz(X,~Y) -> testc(Y,X)
34;
35
36define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
37; SSE-LABEL: ptestz_128_invert1:
38; SSE:       # %bb.0:
39; SSE-NEXT:    movl %edi, %eax
40; SSE-NEXT:    ptest %xmm0, %xmm1
41; SSE-NEXT:    cmovael %esi, %eax
42; SSE-NEXT:    retq
43;
44; AVX-LABEL: ptestz_128_invert1:
45; AVX:       # %bb.0:
46; AVX-NEXT:    movl %edi, %eax
47; AVX-NEXT:    vptest %xmm0, %xmm1
48; AVX-NEXT:    cmovael %esi, %eax
49; AVX-NEXT:    retq
50  %t1 = xor <2 x i64> %d, <i64 -1, i64 -1>
51  %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %t1)
52  %t3 = icmp ne i32 %t2, 0
53  %t4 = select i1 %t3, i32 %a, i32 %b
54  ret i32 %t4
55}
56
57;
58; testc(~X,Y) -> testz(X,Y)
59;
60
61define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
62; SSE-LABEL: ptestc_128_invert0:
63; SSE:       # %bb.0:
64; SSE-NEXT:    movl %edi, %eax
65; SSE-NEXT:    ptest %xmm1, %xmm0
66; SSE-NEXT:    cmovnel %esi, %eax
67; SSE-NEXT:    retq
68;
69; AVX-LABEL: ptestc_128_invert0:
70; AVX:       # %bb.0:
71; AVX-NEXT:    movl %edi, %eax
72; AVX-NEXT:    vptest %xmm1, %xmm0
73; AVX-NEXT:    cmovnel %esi, %eax
74; AVX-NEXT:    retq
75  %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
76  %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
77  %t3 = icmp ne i32 %t2, 0
78  %t4 = select i1 %t3, i32 %a, i32 %b
79  ret i32 %t4
80}
81
82;
83; testnzc(~X,Y) -> testnzc(X,Y)
84;
85
86define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
87; SSE-LABEL: ptestnzc_128_invert0:
88; SSE:       # %bb.0:
89; SSE-NEXT:    movl %edi, %eax
90; SSE-NEXT:    ptest %xmm1, %xmm0
91; SSE-NEXT:    cmovnel %esi, %eax
92; SSE-NEXT:    retq
93;
94; AVX-LABEL: ptestnzc_128_invert0:
95; AVX:       # %bb.0:
96; AVX-NEXT:    movl %edi, %eax
97; AVX-NEXT:    vptest %xmm1, %xmm0
98; AVX-NEXT:    cmovnel %esi, %eax
99; AVX-NEXT:    retq
100  %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
101  %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
102  %t3 = icmp ne i32 %t2, 0
103  %t4 = select i1 %t3, i32 %a, i32 %b
104  ret i32 %t4
105}
106
107;
108; testc(X,~X) -> testc(X,-1)
109;
110
111define i32 @ptestc_128_not(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
112; SSE-LABEL: ptestc_128_not:
113; SSE:       # %bb.0:
114; SSE-NEXT:    movl %edi, %eax
115; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
116; SSE-NEXT:    ptest %xmm1, %xmm0
117; SSE-NEXT:    cmovael %esi, %eax
118; SSE-NEXT:    retq
119;
120; AVX-LABEL: ptestc_128_not:
121; AVX:       # %bb.0:
122; AVX-NEXT:    movl %edi, %eax
123; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
124; AVX-NEXT:    vptest %xmm1, %xmm0
125; AVX-NEXT:    cmovael %esi, %eax
126; AVX-NEXT:    retq
127  %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
128  %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %c, <2 x i64> %t1)
129  %t3 = icmp ne i32 %t2, 0
130  %t4 = select i1 %t3, i32 %a, i32 %b
131  ret i32 %t4
132}
133
134;
135; testz(AND(X,Y),AND(X,Y)) -> testz(X,Y)
136;
137
138define i32 @ptestz_128_and(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
139; SSE-LABEL: ptestz_128_and:
140; SSE:       # %bb.0:
141; SSE-NEXT:    movl %edi, %eax
142; SSE-NEXT:    ptest %xmm1, %xmm0
143; SSE-NEXT:    cmovnel %esi, %eax
144; SSE-NEXT:    retq
145;
146; AVX-LABEL: ptestz_128_and:
147; AVX:       # %bb.0:
148; AVX-NEXT:    movl %edi, %eax
149; AVX-NEXT:    vptest %xmm1, %xmm0
150; AVX-NEXT:    cmovnel %esi, %eax
151; AVX-NEXT:    retq
152  %t1 = and <2 x i64> %c, %d
153  %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t1)
154  %t3 = icmp ne i32 %t2, 0
155  %t4 = select i1 %t3, i32 %a, i32 %b
156  ret i32 %t4
157}
158
159;
160; testz(AND(~X,Y),AND(~X,Y)) -> testc(X,Y)
161;
162
163define i32 @ptestz_128_andc(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
164; SSE-LABEL: ptestz_128_andc:
165; SSE:       # %bb.0:
166; SSE-NEXT:    movl %edi, %eax
167; SSE-NEXT:    ptest %xmm1, %xmm0
168; SSE-NEXT:    cmovael %esi, %eax
169; SSE-NEXT:    retq
170;
171; AVX-LABEL: ptestz_128_andc:
172; AVX:       # %bb.0:
173; AVX-NEXT:    movl %edi, %eax
174; AVX-NEXT:    vptest %xmm1, %xmm0
175; AVX-NEXT:    cmovael %esi, %eax
176; AVX-NEXT:    retq
177  %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
178  %t2 = and <2 x i64> %t1, %d
179  %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> %t2)
180  %t4 = icmp ne i32 %t3, 0
181  %t5 = select i1 %t4, i32 %a, i32 %b
182  ret i32 %t5
183}
184
185;
186; testz(-1,X) -> testz(X,X)
187;
188
189define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
190; SSE-LABEL: ptestz_128_allones0:
191; SSE:       # %bb.0:
192; SSE-NEXT:    movl %edi, %eax
193; SSE-NEXT:    ptest %xmm0, %xmm0
194; SSE-NEXT:    cmovnel %esi, %eax
195; SSE-NEXT:    retq
196;
197; AVX-LABEL: ptestz_128_allones0:
198; AVX:       # %bb.0:
199; AVX-NEXT:    movl %edi, %eax
200; AVX-NEXT:    vptest %xmm0, %xmm0
201; AVX-NEXT:    cmovnel %esi, %eax
202; AVX-NEXT:    retq
203  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> <i64 -1, i64 -1>, <2 x i64> %c)
204  %t2 = icmp ne i32 %t1, 0
205  %t3 = select i1 %t2, i32 %a, i32 %b
206  ret i32 %t3
207}
208
209;
210; testz(X,-1) -> testz(X,X)
211;
212
213define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
214; SSE-LABEL: ptestz_128_allones1:
215; SSE:       # %bb.0:
216; SSE-NEXT:    movl %edi, %eax
217; SSE-NEXT:    ptest %xmm0, %xmm0
218; SSE-NEXT:    cmovnel %esi, %eax
219; SSE-NEXT:    retq
220;
221; AVX-LABEL: ptestz_128_allones1:
222; AVX:       # %bb.0:
223; AVX-NEXT:    movl %edi, %eax
224; AVX-NEXT:    vptest %xmm0, %xmm0
225; AVX-NEXT:    cmovnel %esi, %eax
226; AVX-NEXT:    retq
227  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> <i64 -1, i64 -1>)
228  %t2 = icmp ne i32 %t1, 0
229  %t3 = select i1 %t2, i32 %a, i32 %b
230  ret i32 %t3
231}
232
233define zeroext i1 @PR38522(ptr %x, ptr %y) {
234; SSE-LABEL: PR38522:
235; SSE:       # %bb.0: # %start
236; SSE-NEXT:    movdqa (%rdi), %xmm0
237; SSE-NEXT:    pcmpgtb (%rsi), %xmm0
238; SSE-NEXT:    ptest %xmm0, %xmm0
239; SSE-NEXT:    sete %al
240; SSE-NEXT:    retq
241;
242; AVX-LABEL: PR38522:
243; AVX:       # %bb.0: # %start
244; AVX-NEXT:    vmovdqa (%rdi), %xmm0
245; AVX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
246; AVX-NEXT:    vptest %xmm0, %xmm0
247; AVX-NEXT:    sete %al
248; AVX-NEXT:    retq
249start:
250  %0 = load <16 x i8>, ptr %x, align 16
251  %1 = load <16 x i8>, ptr %y, align 16
252  %2 = icmp sle <16 x i8> %0, %1
253  %3 = sext <16 x i1> %2 to <16 x i8>
254  %4 = bitcast <16 x i8> %3 to <2 x i64>
255  %5 = tail call i32 @llvm.x86.sse41.ptestc(<2 x i64> %4, <2 x i64> <i64 -1, i64 -1>)
256  %6 = icmp eq i32 %5, 1
257  ret i1 %6
258}
259
260;
261; testz(ashr(X,bw-1),-1) -> testpd/testps/movmskpd/movmskps/pmovmskb(X)
262;
263
264define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
265; SSE41-LABEL: ptestz_v2i64_signbits:
266; SSE41:       # %bb.0:
267; SSE41-NEXT:    movl %edi, %eax
268; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
269; SSE41-NEXT:    movmskps %xmm0, %ecx
270; SSE41-NEXT:    testl %ecx, %ecx
271; SSE41-NEXT:    cmovnel %esi, %eax
272; SSE41-NEXT:    retq
273;
274; SSE42-LABEL: ptestz_v2i64_signbits:
275; SSE42:       # %bb.0:
276; SSE42-NEXT:    movl %edi, %eax
277; SSE42-NEXT:    movmskpd %xmm0, %ecx
278; SSE42-NEXT:    testl %ecx, %ecx
279; SSE42-NEXT:    cmovnel %esi, %eax
280; SSE42-NEXT:    retq
281;
282; AVX-LABEL: ptestz_v2i64_signbits:
283; AVX:       # %bb.0:
284; AVX-NEXT:    movl %edi, %eax
285; AVX-NEXT:    vtestpd %xmm0, %xmm0
286; AVX-NEXT:    cmovnel %esi, %eax
287; AVX-NEXT:    retq
288  %t1 = ashr <2 x i64> %c, <i64 63, i64 63>
289  %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> <i64 -1, i64 -1>)
290  %t3 = icmp ne i32 %t2, 0
291  %t4 = select i1 %t3, i32 %a, i32 %b
292  ret i32 %t4
293}
294
295define i32 @ptestz_v4i32_signbits(<4 x i32> %c, i32 %a, i32 %b) {
296; SSE-LABEL: ptestz_v4i32_signbits:
297; SSE:       # %bb.0:
298; SSE-NEXT:    movl %edi, %eax
299; SSE-NEXT:    movmskps %xmm0, %ecx
300; SSE-NEXT:    testl %ecx, %ecx
301; SSE-NEXT:    cmovnel %esi, %eax
302; SSE-NEXT:    retq
303;
304; AVX-LABEL: ptestz_v4i32_signbits:
305; AVX:       # %bb.0:
306; AVX-NEXT:    movl %edi, %eax
307; AVX-NEXT:    vtestps %xmm0, %xmm0
308; AVX-NEXT:    cmovnel %esi, %eax
309; AVX-NEXT:    retq
310  %t1 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
311  %t2 = bitcast <4 x i32> %t1 to <2 x i64>
312  %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
313  %t4 = icmp ne i32 %t3, 0
314  %t5 = select i1 %t4, i32 %a, i32 %b
315  ret i32 %t5
316}
317
318define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) {
319; SSE-LABEL: ptestz_v8i16_signbits:
320; SSE:       # %bb.0:
321; SSE-NEXT:    movl %edi, %eax
322; SSE-NEXT:    pmovmskb %xmm0, %ecx
323; SSE-NEXT:    testl $43690, %ecx # imm = 0xAAAA
324; SSE-NEXT:    cmovnel %esi, %eax
325; SSE-NEXT:    retq
326;
327; AVX-LABEL: ptestz_v8i16_signbits:
328; AVX:       # %bb.0:
329; AVX-NEXT:    movl %edi, %eax
330; AVX-NEXT:    vpmovmskb %xmm0, %ecx
331; AVX-NEXT:    testl $43690, %ecx # imm = 0xAAAA
332; AVX-NEXT:    cmovnel %esi, %eax
333; AVX-NEXT:    retq
334  %t1 = ashr <8 x i16> %c, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
335  %t2 = bitcast <8 x i16> %t1 to <2 x i64>
336  %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
337  %t4 = icmp ne i32 %t3, 0
338  %t5 = select i1 %t4, i32 %a, i32 %b
339  ret i32 %t5
340}
341
342;
343; testz(or(extract_lo(X),extract_hi(X),or(extract_lo(Y),extract_hi(Y)) -> testz(X,Y)
344;
345
346define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
347; SSE-LABEL: ptestz_v2i64_concat:
348; SSE:       # %bb.0:
349; SSE-NEXT:    movl %edi, %eax
350; SSE-NEXT:    por %xmm1, %xmm0
351; SSE-NEXT:    por %xmm3, %xmm2
352; SSE-NEXT:    ptest %xmm2, %xmm0
353; SSE-NEXT:    cmovnel %esi, %eax
354; SSE-NEXT:    retq
355;
356; AVX-LABEL: ptestz_v2i64_concat:
357; AVX:       # %bb.0:
358; AVX-NEXT:    movl %edi, %eax
359; AVX-NEXT:    vptest %ymm1, %ymm0
360; AVX-NEXT:    cmovnel %esi, %eax
361; AVX-NEXT:    vzeroupper
362; AVX-NEXT:    retq
363  %t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
364  %t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
365  %t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
366  %t4 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
367  %t5 = or <2 x i64> %t1, %t2
368  %t6 = or <2 x i64> %t4, %t3
369  %t7 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t5, <2 x i64> %t6)
370  %t8 = icmp ne i32 %t7, 0
371  %t9 = select i1 %t8, i32 %a, i32 %b
372  ret i32 %t9
373}
374
375; PR123456 - all_of(x == 0)
376define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
377; SSE-LABEL: ptestc_v4i32_eq0:
378; SSE:       # %bb.0:
379; SSE-NEXT:    ptest %xmm0, %xmm0
380; SSE-NEXT:    sete %al
381; SSE-NEXT:    retq
382;
383; AVX-LABEL: ptestc_v4i32_eq0:
384; AVX:       # %bb.0:
385; AVX-NEXT:    vptest %xmm0, %xmm0
386; AVX-NEXT:    sete %al
387; AVX-NEXT:    retq
388  %icmp = icmp eq <4 x i32> %a0, zeroinitializer
389  %sext = sext <4 x i1> %icmp to <4 x i32>
390  %bc = bitcast <4 x i32> %sext to <2 x i64>
391  %test = tail call noundef i32 @llvm.x86.sse41.ptestc(<2 x i64> %bc, <2 x i64> splat (i64 -1))
392  %res = icmp ne i32 %test, 0
393  ret i1 %res
394}
395
396; PR123456 - all_of((a & b) == 0)
397define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
398; SSE-LABEL: ptestc_v4i32_and_eq0:
399; SSE:       # %bb.0:
400; SSE-NEXT:    ptest %xmm0, %xmm1
401; SSE-NEXT:    sete %al
402; SSE-NEXT:    retq
403;
404; AVX-LABEL: ptestc_v4i32_and_eq0:
405; AVX:       # %bb.0:
406; AVX-NEXT:    vptest %xmm0, %xmm1
407; AVX-NEXT:    sete %al
408; AVX-NEXT:    retq
409  %and = and <4 x i32> %a1, %a0
410  %icmp = icmp eq <4 x i32> %and, zeroinitializer
411  %sext = sext <4 x i1> %icmp to <4 x i32>
412  %bc = bitcast <4 x i32> %sext to <2 x i64>
413  %test = tail call noundef i32 @llvm.x86.sse41.ptestc(<2 x i64> %bc, <2 x i64> splat (i64 -1))
414  %res = icmp ne i32 %test, 0
415  ret i1 %res
416}
417
418; PR123456 - !all_of((a & ~b) == 0)
419define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) {
420; SSE-LABEL: ptestc_v4i32_andnot_eq0:
421; SSE:       # %bb.0:
422; SSE-NEXT:    ptest %xmm0, %xmm1
423; SSE-NEXT:    setae %al
424; SSE-NEXT:    retq
425;
426; AVX-LABEL: ptestc_v4i32_andnot_eq0:
427; AVX:       # %bb.0:
428; AVX-NEXT:    vptest %xmm0, %xmm1
429; AVX-NEXT:    setae %al
430; AVX-NEXT:    retq
431  %not = xor <4 x i32> %a1, splat (i32 -1)
432  %and = and <4 x i32> %a0, %not
433  %icmp = icmp eq <4 x i32> %and, zeroinitializer
434  %sext = sext <4 x i1> %icmp to <4 x i32>
435  %bc = bitcast <4 x i32> %sext to <2 x i64>
436  %test = tail call noundef i32 @llvm.x86.sse41.ptestc(<2 x i64> %bc, <2 x i64> splat (i64 -1))
437  %res = icmp eq i32 %test, 0
438  ret i1 %res
439}
440
441; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1))
442define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) {
443; SSE-LABEL: PR38788:
444; SSE:       # %bb.0:
445; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
446; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
447; SSE-NEXT:    ptest %xmm1, %xmm0
448; SSE-NEXT:    setb %al
449; SSE-NEXT:    retq
450;
451; AVX-LABEL: PR38788:
452; AVX:       # %bb.0:
453; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
454; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
455; AVX-NEXT:    vptest %xmm1, %xmm0
456; AVX-NEXT:    setb %al
457; AVX-NEXT:    retq
458  %3 = icmp eq <4 x i32> %0, %1
459  %4 = sext <4 x i1> %3 to <4 x i32>
460  %5 = bitcast <4 x i32> %4 to <2 x i64>
461  %6 = tail call i32 @llvm.x86.sse41.ptestc(<2 x i64> %5, <2 x i64> <i64 -1, i64 -1>)
462  %7 = icmp eq i32 %6, 1
463  ret i1 %7
464}
465
466define i32 @PR88958_1(ptr %0, <2 x i64> %1) {
467; SSE-LABEL: PR88958_1:
468; SSE:       # %bb.0:
469; SSE-NEXT:    xorl %eax, %eax
470; SSE-NEXT:    ptest (%rdi), %xmm0
471; SSE-NEXT:    sete %al
472; SSE-NEXT:    retq
473;
474; AVX-LABEL: PR88958_1:
475; AVX:       # %bb.0:
476; AVX-NEXT:    xorl %eax, %eax
477; AVX-NEXT:    vptest (%rdi), %xmm0
478; AVX-NEXT:    sete %al
479; AVX-NEXT:    retq
480  %3 = load <2 x i64>, ptr %0
481  %4 = tail call i32 @llvm.x86.sse41.ptestz(<2 x i64> %3, <2 x i64> %1)
482  ret i32 %4
483}
484
485define i32 @PR88958_2(ptr %0, <2 x i64> %1) {
486; SSE-LABEL: PR88958_2:
487; SSE:       # %bb.0:
488; SSE-NEXT:    movdqa (%rdi), %xmm1
489; SSE-NEXT:    xorl %eax, %eax
490; SSE-NEXT:    ptest %xmm0, %xmm1
491; SSE-NEXT:    setb %al
492; SSE-NEXT:    retq
493;
494; AVX-LABEL: PR88958_2:
495; AVX:       # %bb.0:
496; AVX-NEXT:    vmovdqa (%rdi), %xmm1
497; AVX-NEXT:    xorl %eax, %eax
498; AVX-NEXT:    vptest %xmm0, %xmm1
499; AVX-NEXT:    setb %al
500; AVX-NEXT:    retq
501  %3 = load <2 x i64>, ptr %0
502  %4 = tail call i32 @llvm.x86.sse41.ptestc(<2 x i64> %3, <2 x i64> %1)
503  ret i32 %4
504}
505
506declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
507declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
508declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
509