xref: /llvm-project/llvm/test/CodeGen/X86/pr63091.ll (revision d37bd544ffe0e56abd4554493b278ecc64e3d51a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
5
6; Ensure canonicalizeShuffleWithBinOps doesn't merge binops with different types
7
8; Don't merge PCMPGT nodes of different types
9define <4 x i32> @dont_merge_pcmpgt(<16 x i8> %0, <4 x i32> %1) {
10; SSE-LABEL: dont_merge_pcmpgt:
11; SSE:       # %bb.0:
12; SSE-NEXT:    pxor %xmm2, %xmm2
13; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
14; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
15; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
16; SSE-NEXT:    retq
17;
18; AVX-LABEL: dont_merge_pcmpgt:
19; AVX:       # %bb.0:
20; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
21; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
22; AVX-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm1
23; AVX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
24; AVX-NEXT:    retq
25  %3 = icmp sgt <16 x i8> %0, zeroinitializer
26  %4 = sext <16 x i1> %3 to <16 x i8>
27  %5 = bitcast <16 x i8> %4 to <4 x i32>
28  %6 = icmp sgt <4 x i32> %1, zeroinitializer
29  %7 = sext <4 x i1> %6 to <4 x i32>
30  %8 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
31  ret <4 x i32> %8
32}
33
34; FIXME: OK to merge logic nodes of different types
35define <4 x i32> @merge_and(<16 x i8> %0, <4 x i32> %1) {
36; SSE-LABEL: merge_and:
37; SSE:       # %bb.0:
38; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
39; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
40; SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
41; SSE-NEXT:    retq
42;
43; AVX2-LABEL: merge_and:
44; AVX2:       # %bb.0:
45; AVX2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
46; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
47; AVX2-NEXT:    vandps %xmm2, %xmm1, %xmm1
48; AVX2-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
49; AVX2-NEXT:    retq
50;
51; AVX512-LABEL: merge_and:
52; AVX512:       # %bb.0:
53; AVX512-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
54; AVX512-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
55; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
56; AVX512-NEXT:    retq
57  %3 = and <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
58  %4 = bitcast <16 x i8> %3 to <4 x i32>
59  %5 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
60  %6 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
61  ret <4 x i32> %6
62}
63