xref: /llvm-project/llvm/test/CodeGen/X86/combine-testps.ll (revision 254cdcddd65efe3c045d49853239932ab07121d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
4
5;
6; testz(~X,Y) -> testc(X,Y)
7;
8
9define i32 @testpsz_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
10; CHECK-LABEL: testpsz_128_invert0:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    movl %edi, %eax
13; CHECK-NEXT:    vtestps %xmm1, %xmm0
14; CHECK-NEXT:    cmovael %esi, %eax
15; CHECK-NEXT:    retq
16  %t0 = bitcast <4 x float> %c to <2 x i64>
17  %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
18  %t2 = bitcast <2 x i64> %t1 to <4 x float>
19  %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d)
20  %t4 = icmp ne i32 %t3, 0
21  %t5 = select i1 %t4, i32 %a, i32 %b
22  ret i32 %t5
23}
24
25define i32 @testpsz_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
26; CHECK-LABEL: testpsz_256_invert0:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    movl %edi, %eax
29; CHECK-NEXT:    vtestps %ymm1, %ymm0
30; CHECK-NEXT:    cmovael %esi, %eax
31; CHECK-NEXT:    vzeroupper
32; CHECK-NEXT:    retq
33  %t0 = bitcast <8 x float> %c to <4 x i64>
34  %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
35  %t2 = bitcast <4 x i64> %t1 to <8 x float>
36  %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %d)
37  %t4 = icmp ne i32 %t3, 0
38  %t5 = select i1 %t4, i32 %a, i32 %b
39  ret i32 %t5
40}
41
42;
43; testz(X,~Y) -> testc(Y,X)
44;
45
46define i32 @testpsz_128_invert1(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
47; CHECK-LABEL: testpsz_128_invert1:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    movl %edi, %eax
50; CHECK-NEXT:    vtestps %xmm0, %xmm1
51; CHECK-NEXT:    cmovael %esi, %eax
52; CHECK-NEXT:    retq
53  %t0 = bitcast <4 x float> %d to <2 x i64>
54  %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
55  %t2 = bitcast <2 x i64> %t1 to <4 x float>
56  %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %c, <4 x float> %t2)
57  %t4 = icmp ne i32 %t3, 0
58  %t5 = select i1 %t4, i32 %a, i32 %b
59  ret i32 %t5
60}
61
62define i32 @testpsz_256_invert1(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
63; CHECK-LABEL: testpsz_256_invert1:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    movl %edi, %eax
66; CHECK-NEXT:    vtestps %ymm0, %ymm1
67; CHECK-NEXT:    cmovael %esi, %eax
68; CHECK-NEXT:    vzeroupper
69; CHECK-NEXT:    retq
70  %t0 = bitcast <8 x float> %d to <4 x i64>
71  %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
72  %t2 = bitcast <4 x i64> %t1 to <8 x float>
73  %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %c, <8 x float> %t2)
74  %t4 = icmp ne i32 %t3, 0
75  %t5 = select i1 %t4, i32 %a, i32 %b
76  ret i32 %t5
77}
78
79;
80; testc(~X,Y) -> testz(X,Y)
81;
82
83define i32 @testpsc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
84; CHECK-LABEL: testpsc_128_invert0:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    movl %edi, %eax
87; CHECK-NEXT:    vtestps %xmm1, %xmm0
88; CHECK-NEXT:    cmovnel %esi, %eax
89; CHECK-NEXT:    retq
90  %t0 = bitcast <4 x float> %c to <2 x i64>
91  %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
92  %t2 = bitcast <2 x i64> %t1 to <4 x float>
93  %t3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %t2, <4 x float> %d)
94  %t4 = icmp ne i32 %t3, 0
95  %t5 = select i1 %t4, i32 %a, i32 %b
96  ret i32 %t5
97}
98
99define i32 @testpsc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
100; CHECK-LABEL: testpsc_256_invert0:
101; CHECK:       # %bb.0:
102; CHECK-NEXT:    movl %edi, %eax
103; CHECK-NEXT:    vtestps %ymm1, %ymm0
104; CHECK-NEXT:    cmovnel %esi, %eax
105; CHECK-NEXT:    vzeroupper
106; CHECK-NEXT:    retq
107  %t0 = bitcast <8 x float> %c to <4 x i64>
108  %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
109  %t2 = bitcast <4 x i64> %t1 to <8 x float>
110  %t3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %t2, <8 x float> %d)
111  %t4 = icmp ne i32 %t3, 0
112  %t5 = select i1 %t4, i32 %a, i32 %b
113  ret i32 %t5
114}
115
116;
117; testnzc(~X,Y) -> testnzc(X,Y)
118;
119
120define i32 @testpsnzc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
121; CHECK-LABEL: testpsnzc_128_invert0:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    movl %edi, %eax
124; CHECK-NEXT:    vtestps %xmm1, %xmm0
125; CHECK-NEXT:    cmovbel %esi, %eax
126; CHECK-NEXT:    retq
127  %t0 = bitcast <4 x float> %c to <2 x i64>
128  %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
129  %t2 = bitcast <2 x i64> %t1 to <4 x float>
130  %t3 = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %t2, <4 x float> %d)
131  %t4 = icmp ne i32 %t3, 0
132  %t5 = select i1 %t4, i32 %a, i32 %b
133  ret i32 %t5
134}
135
136define i32 @testpsnzc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
137; CHECK-LABEL: testpsnzc_256_invert0:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    movl %edi, %eax
140; CHECK-NEXT:    vtestps %ymm1, %ymm0
141; CHECK-NEXT:    cmovbel %esi, %eax
142; CHECK-NEXT:    vzeroupper
143; CHECK-NEXT:    retq
144  %t0 = bitcast <8 x float> %c to <4 x i64>
145  %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
146  %t2 = bitcast <4 x i64> %t1 to <8 x float>
147  %t3 = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %t2, <8 x float> %d)
148  %t4 = icmp ne i32 %t3, 0
149  %t5 = select i1 %t4, i32 %a, i32 %b
150  ret i32 %t5
151}
152
153;
154; SimplifyDemandedBits - only the sign bit is required
155;
156
157define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
158; CHECK-LABEL: testpsz_128_signbit:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    movl %edi, %eax
161; CHECK-NEXT:    vtestps %xmm1, %xmm0
162; CHECK-NEXT:    cmovnel %esi, %eax
163; CHECK-NEXT:    retq
164  %t0 = bitcast <4 x float> %c to <4 x i32>
165  %t1 = ashr <4 x i32> %t0, <i32 31, i32 31, i32 31, i32 31>
166  %t2 = bitcast <4 x i32> %t1 to <4 x float>
167  %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d)
168  %t4 = icmp ne i32 %t3, 0
169  %t5 = select i1 %t4, i32 %a, i32 %b
170  ret i32 %t5
171}
172
173define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
174; CHECK-LABEL: testpsnzc_256_signbit:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    movl %edi, %eax
177; CHECK-NEXT:    vtestps %ymm1, %ymm0
178; CHECK-NEXT:    cmovnel %esi, %eax
179; CHECK-NEXT:    vzeroupper
180; CHECK-NEXT:    retq
181  %t0 = bitcast <8 x float> %c to <8 x i32>
182  %t1 = icmp sgt <8 x i32> zeroinitializer, %t0
183  %t2 = sext <8 x i1> %t1 to <8 x i32>
184  %t3 = bitcast <8 x i32> %t2 to <8 x float>
185  %t4 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t3, <8 x float> %d)
186  %t5 = icmp ne i32 %t4, 0
187  %t6 = select i1 %t5, i32 %a, i32 %b
188  ret i32 %t6
189}
190
191define i32 @testpsc_256_signbit_multiuse(<8 x float> %c, i32 %a, i32 %b) {
192; CHECK-LABEL: testpsc_256_signbit_multiuse:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    movl %edi, %eax
195; CHECK-NEXT:    vtestps %ymm0, %ymm0
196; CHECK-NEXT:    cmovnel %esi, %eax
197; CHECK-NEXT:    vzeroupper
198; CHECK-NEXT:    retq
199  %t0 = bitcast <8 x float> %c to <8 x i32>
200  %t1 = ashr <8 x i32> %t0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
201  %t2 = bitcast <8 x i32> %t1 to <8 x float>
202  %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %t2)
203  %t4 = icmp ne i32 %t3, 0
204  %t5 = select i1 %t4, i32 %a, i32 %b
205  ret i32 %t5
206}
207
208define i1 @PR62171(<8 x float> %a0, <8 x float> %a1) {
209; CHECK-LABEL: PR62171:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm0
212; CHECK-NEXT:    vtestps %ymm0, %ymm0
213; CHECK-NEXT:    sete %al
214; CHECK-NEXT:    vzeroupper
215; CHECK-NEXT:    retq
216  %cmp = fcmp oeq <8 x float> %a0, %a1
217  %sext = sext <8 x i1> %cmp to <8 x i32>
218  %extract = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
219  %extract1 = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
220  %or = or <4 x i32> %extract, %extract1
221  %or1 = bitcast <4 x i32> %or to <16 x i8>
222  %msk = icmp slt <16 x i8> %or1, zeroinitializer
223  %msk1 = bitcast <16 x i1> %msk to i16
224  %not = icmp eq i16 %msk1, 0
225  ret i1 %not
226}
227
228define void @combine_testp_v8f32(<8 x i32> %x){
229; AVX-LABEL: combine_testp_v8f32:
230; AVX:       # %bb.0: # %entry
231; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
232; AVX-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
233; AVX-NEXT:    vtestps %ymm1, %ymm0
234; AVX-NEXT:    vzeroupper
235; AVX-NEXT:    retq
236;
237; AVX2-LABEL: combine_testp_v8f32:
238; AVX2:       # %bb.0: # %entry
239; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
240; AVX2-NEXT:    vtestps %ymm1, %ymm0
241; AVX2-NEXT:    vzeroupper
242; AVX2-NEXT:    retq
243entry:
244  %xor.i.i.i.i.i.i.i.i.i = xor <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
245  %.cast.i.i.i.i.i.i = bitcast <8 x i32> %xor.i.i.i.i.i.i.i.i.i to <8 x float>
246  %0 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %.cast.i.i.i.i.i.i, <8 x float> %.cast.i.i.i.i.i.i)
247  %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0
248  br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end
249
250if.end3.i.i.i.i.i.i:                              ; preds = %entry
251  ret void
252
253end: ; preds = %entry
254  ret void
255}
256
257define i32 @PR88958_1(ptr %0, <4 x float> %1) {
258; SSE-LABEL: PR88958_1:
259; SSE:       # %bb.0:
260; SSE-NEXT:    xorl %eax, %eax
261; SSE-NEXT:    ptest (%rdi), %xmm0
262; SSE-NEXT:    sete %al
263; SSE-NEXT:    retq
264;
265; CHECK-LABEL: PR88958_1:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    xorl %eax, %eax
268; CHECK-NEXT:    vtestps (%rdi), %xmm0
269; CHECK-NEXT:    sete %al
270; CHECK-NEXT:    retq
271  %3 = load <4 x float>, ptr %0
272  %4 = tail call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %3, <4 x float> %1)
273  ret i32 %4
274}
275
276define i32 @PR88958_2(ptr %0, <4 x float> %1) {
277; SSE-LABEL: PR88958_2:
278; SSE:       # %bb.0:
279; SSE-NEXT:    movdqa (%rdi), %xmm1
280; SSE-NEXT:    xorl %eax, %eax
281; SSE-NEXT:    ptest %xmm0, %xmm1
282; SSE-NEXT:    setb %al
283; SSE-NEXT:    retq
284;
285; CHECK-LABEL: PR88958_2:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    vmovaps (%rdi), %xmm1
288; CHECK-NEXT:    xorl %eax, %eax
289; CHECK-NEXT:    vtestps %xmm0, %xmm1
290; CHECK-NEXT:    setb %al
291; CHECK-NEXT:    retq
292  %3 = load <4 x float>, ptr %0
293  %4 = tail call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %3, <4 x float> %1)
294  ret i32 %4
295}
296
297define i32 @PR88958_3(ptr %0, <8 x float> %1) {
298; SSE-LABEL: PR88958_1:
299; SSE:       # %bb.0:
300; SSE-NEXT:    xorl %eax, %eax
301; SSE-NEXT:    ptest (%rdi), %xmm0
302; SSE-NEXT:    sete %al
303; SSE-NEXT:    retq
304;
305; CHECK-LABEL: PR88958_3:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    xorl %eax, %eax
308; CHECK-NEXT:    vtestps (%rdi), %ymm0
309; CHECK-NEXT:    sete %al
310; CHECK-NEXT:    vzeroupper
311; CHECK-NEXT:    retq
312  %3 = load <8 x float>, ptr %0
313  %4 = tail call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %3, <8 x float> %1)
314  ret i32 %4
315}
316
317define i32 @PR88958_4(ptr %0, <8 x float> %1) {
318; SSE-LABEL: PR88958_2:
319; SSE:       # %bb.0:
320; SSE-NEXT:    movdqa (%rdi), %xmm1
321; SSE-NEXT:    xorl %eax, %eax
322; SSE-NEXT:    ptest %xmm0, %xmm1
323; SSE-NEXT:    setb %al
324; SSE-NEXT:    retq
325;
326; CHECK-LABEL: PR88958_4:
327; CHECK:       # %bb.0:
328; CHECK-NEXT:    vmovaps (%rdi), %ymm1
329; CHECK-NEXT:    xorl %eax, %eax
330; CHECK-NEXT:    vtestps %ymm0, %ymm1
331; CHECK-NEXT:    setb %al
332; CHECK-NEXT:    vzeroupper
333; CHECK-NEXT:    retq
334  %3 = load <8 x float>, ptr %0
335  %4 = tail call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %3, <8 x float> %1)
336  ret i32 %4
337}
338
339declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
340declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
341declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
342
343declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
344declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
345declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
346