xref: /llvm-project/llvm/test/CodeGen/X86/broadcastm-lowering.ll (revision cf9b1f7a0e9da5d019a8bea853f3cff85d808d18)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
9; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
14
15define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
16; AVX512CD-LABEL: test_mm_epi64:
17; AVX512CD:       # %bb.0: # %entry
18; AVX512CD-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
19; AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
20; AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
21; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
22; AVX512CD-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
23; AVX512CD-NEXT:    vzeroupper
24; AVX512CD-NEXT:    ret{{[l|q]}}
25;
26; AVX512CDBW-LABEL: test_mm_epi64:
27; AVX512CDBW:       # %bb.0: # %entry
28; AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
29; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30; AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
31; AVX512CDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
32; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
33; AVX512CDBW-NEXT:    vzeroupper
34; AVX512CDBW-NEXT:    ret{{[l|q]}}
35;
36; AVX512VLCDBW-LABEL: test_mm_epi64:
37; AVX512VLCDBW:       # %bb.0: # %entry
38; AVX512VLCDBW-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
39; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %xmm0
40; AVX512VLCDBW-NEXT:    ret{{[l|q]}}
41entry:
42  %0 = icmp eq <8 x i16> %a, %b
43  %1 = bitcast <8 x i1> %0 to i8
44  %conv.i = zext i8 %1 to i64
45  %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
46  %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
47  ret <2 x i64> %vecinit1.i.i
48}
49
50define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
51; AVX512CD-LABEL: test_mm_epi32:
52; AVX512CD:       # %bb.0: # %entry
53; AVX512CD-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
54; AVX512CD-NEXT:    vpmovmskb %xmm0, %eax
55; AVX512CD-NEXT:    vmovd %eax, %xmm0
56; AVX512CD-NEXT:    vpbroadcastd %xmm0, %xmm0
57; AVX512CD-NEXT:    ret{{[l|q]}}
58;
59; AVX512CDBW-LABEL: test_mm_epi32:
60; AVX512CDBW:       # %bb.0: # %entry
61; AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
62; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
63; AVX512CDBW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
64; AVX512CDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
65; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
66; AVX512CDBW-NEXT:    vzeroupper
67; AVX512CDBW-NEXT:    ret{{[l|q]}}
68;
69; AVX512VLCDBW-LABEL: test_mm_epi32:
70; AVX512VLCDBW:       # %bb.0: # %entry
71; AVX512VLCDBW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
72; AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %xmm0
73; AVX512VLCDBW-NEXT:    ret{{[l|q]}}
74entry:
75  %0 = icmp eq <16 x i8> %a, %b
76  %1 = bitcast <16 x i1> %0 to i16
77  %conv.i = zext i16 %1 to i32
78  %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
79  %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
80  ret <4 x i32> %vecinit3.i.i
81}
82
83define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
84; ALL-LABEL: test_mm512_epi32:
85; ALL:       # %bb.0: # %entry
86; ALL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
87; ALL-NEXT:    vpbroadcastmw2d %k0, %zmm0
88; ALL-NEXT:    ret{{[l|q]}}
89entry:
90  %0 = icmp eq <16 x i32> %a, %b
91  %1 = bitcast <16 x i1> %0 to i16
92  %conv.i = zext i16 %1 to i32
93  %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
94  %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
95  ret <16 x i32> %vecinit15.i.i
96}
97
98define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
99; AVX512CD-LABEL: test_mm512_epi64:
100; AVX512CD:       # %bb.0: # %entry
101; AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
102; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
103; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
104; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
105; AVX512CD-NEXT:    ret{{[l|q]}}
106;
107; AVX512CDBW-LABEL: test_mm512_epi64:
108; AVX512CDBW:       # %bb.0: # %entry
109; AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
110; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
111; AVX512CDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
112; AVX512CDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
113; AVX512CDBW-NEXT:    ret{{[l|q]}}
114;
115; AVX512VLCDBW-LABEL: test_mm512_epi64:
116; AVX512VLCDBW:       # %bb.0: # %entry
117; AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
118; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
119; AVX512VLCDBW-NEXT:    ret{{[l|q]}}
120entry:
121  %0 = icmp eq <8 x i32> %a, %b
122  %1 = bitcast <8 x i1> %0 to i8
123  %conv.i = zext i8 %1 to i64
124  %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
125  %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
126  ret <8 x i64> %vecinit7.i.i
127}
128
129define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
130; AVX512CD-LABEL: test_mm256_epi64:
131; AVX512CD:       # %bb.0: # %entry
132; AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
133; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
134; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
135; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
136; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
137; AVX512CD-NEXT:    ret{{[l|q]}}
138;
139; AVX512CDBW-LABEL: test_mm256_epi64:
140; AVX512CDBW:       # %bb.0: # %entry
141; AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
142; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
143; AVX512CDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
144; AVX512CDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
145; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
146; AVX512CDBW-NEXT:    ret{{[l|q]}}
147;
148; AVX512VLCDBW-LABEL: test_mm256_epi64:
149; AVX512VLCDBW:       # %bb.0: # %entry
150; AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
151; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %ymm0
152; AVX512VLCDBW-NEXT:    ret{{[l|q]}}
153entry:
154  %0 = icmp eq <8 x i32> %a, %b
155  %1 = bitcast <8 x i1> %0 to i8
156  %conv.i = zext i8 %1 to i64
157  %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
158  %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
159  ret <4 x i64> %vecinit3.i.i
160}
161
162define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
163; AVX512CD-LABEL: test_mm256_epi32:
164; AVX512CD:       # %bb.0: # %entry
165; AVX512CD-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
166; AVX512CD-NEXT:    vpmovsxwd %ymm0, %zmm0
167; AVX512CD-NEXT:    vptestmd %zmm0, %zmm0, %k0
168; AVX512CD-NEXT:    vpbroadcastmw2d %k0, %zmm0
169; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
170; AVX512CD-NEXT:    ret{{[l|q]}}
171;
172; AVX512CDBW-LABEL: test_mm256_epi32:
173; AVX512CDBW:       # %bb.0: # %entry
174; AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
175; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
176; AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
177; AVX512CDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
178; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
179; AVX512CDBW-NEXT:    ret{{[l|q]}}
180;
181; AVX512VLCDBW-LABEL: test_mm256_epi32:
182; AVX512VLCDBW:       # %bb.0: # %entry
183; AVX512VLCDBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
184; AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %ymm0
185; AVX512VLCDBW-NEXT:    ret{{[l|q]}}
186entry:
187  %0 = icmp eq <16 x i16> %a, %b
188  %1 = bitcast <16 x i1> %0 to i16
189  %conv.i = zext i16 %1 to i32
190  %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
191  %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
192  ret <8 x i32> %vecinit7.i.i
193}
194
195