xref: /llvm-project/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll (revision 19f657d55d679cc3949e9e4c1a5bf76cc4c031b1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
9; SSE2-SSSE3-LABEL: bitcast_i2_2i1:
10; SSE2-SSSE3:       # %bb.0:
11; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
12; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
13; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
14; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
15; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
16; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
17; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
18; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
19; SSE2-SSSE3-NEXT:    retq
20;
21; AVX1-LABEL: bitcast_i2_2i1:
22; AVX1:       # %bb.0:
23; AVX1-NEXT:    vmovd %edi, %xmm0
24; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
25; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [1,2]
26; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
28; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
29; AVX1-NEXT:    retq
30;
31; AVX2-LABEL: bitcast_i2_2i1:
32; AVX2:       # %bb.0:
33; AVX2-NEXT:    vmovd %edi, %xmm0
34; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
35; AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [1,2]
36; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
38; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
39; AVX2-NEXT:    retq
40;
41; AVX512-LABEL: bitcast_i2_2i1:
42; AVX512:       # %bb.0:
43; AVX512-NEXT:    kmovd %edi, %k1
44; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
45; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
46; AVX512-NEXT:    retq
47  %1 = bitcast i2 %a0 to <2 x i1>
48  ret <2 x i1> %1
49}
50
51define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {
52; SSE2-SSSE3-LABEL: bitcast_i4_4i1:
53; SSE2-SSSE3:       # %bb.0:
54; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
55; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
56; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
57; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
58; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
59; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
60; SSE2-SSSE3-NEXT:    retq
61;
62; AVX1-LABEL: bitcast_i4_4i1:
63; AVX1:       # %bb.0:
64; AVX1-NEXT:    vmovd %edi, %xmm0
65; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
66; AVX1-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
67; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
68; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
69; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
70; AVX1-NEXT:    retq
71;
72; AVX2-LABEL: bitcast_i4_4i1:
73; AVX2:       # %bb.0:
74; AVX2-NEXT:    vmovd %edi, %xmm0
75; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
76; AVX2-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
77; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
78; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
79; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
80; AVX2-NEXT:    retq
81;
82; AVX512-LABEL: bitcast_i4_4i1:
83; AVX512:       # %bb.0:
84; AVX512-NEXT:    kmovd %edi, %k1
85; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
86; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
87; AVX512-NEXT:    retq
88  %1 = bitcast i4 %a0 to <4 x i1>
89  ret <4 x i1> %1
90}
91
92define <8 x i1> @bitcast_i8_8i1(i8 zeroext %a0) {
93; SSE2-SSSE3-LABEL: bitcast_i8_8i1:
94; SSE2-SSSE3:       # %bb.0:
95; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
96; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
97; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
98; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
99; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
100; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
101; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
102; SSE2-SSSE3-NEXT:    retq
103;
104; AVX1-LABEL: bitcast_i8_8i1:
105; AVX1:       # %bb.0:
106; AVX1-NEXT:    vmovd %edi, %xmm0
107; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
108; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
109; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
110; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
111; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
112; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
113; AVX1-NEXT:    retq
114;
115; AVX2-LABEL: bitcast_i8_8i1:
116; AVX2:       # %bb.0:
117; AVX2-NEXT:    vmovd %edi, %xmm0
118; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
119; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
120; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
121; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
122; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
123; AVX2-NEXT:    retq
124;
125; AVX512-LABEL: bitcast_i8_8i1:
126; AVX512:       # %bb.0:
127; AVX512-NEXT:    kmovd %edi, %k0
128; AVX512-NEXT:    vpmovm2w %k0, %xmm0
129; AVX512-NEXT:    retq
130  %1 = bitcast i8 %a0 to <8 x i1>
131  ret <8 x i1> %1
132}
133
134; PR54911
135define <8 x i1> @bitcast_i8_8i1_freeze(i8 zeroext %a0) {
136; SSE2-SSSE3-LABEL: bitcast_i8_8i1_freeze:
137; SSE2-SSSE3:       # %bb.0:
138; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
139; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
140; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
141; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
142; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
143; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
144; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
145; SSE2-SSSE3-NEXT:    retq
146;
147; AVX1-LABEL: bitcast_i8_8i1_freeze:
148; AVX1:       # %bb.0:
149; AVX1-NEXT:    vmovd %edi, %xmm0
150; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
151; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
152; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
153; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
154; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
155; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
156; AVX1-NEXT:    retq
157;
158; AVX2-LABEL: bitcast_i8_8i1_freeze:
159; AVX2:       # %bb.0:
160; AVX2-NEXT:    vmovd %edi, %xmm0
161; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
162; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
163; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
164; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
165; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
166; AVX2-NEXT:    retq
167;
168; AVX512-LABEL: bitcast_i8_8i1_freeze:
169; AVX512:       # %bb.0:
170; AVX512-NEXT:    kmovd %edi, %k0
171; AVX512-NEXT:    vpmovm2w %k0, %xmm0
172; AVX512-NEXT:    retq
173  %1 = bitcast i8 %a0 to <8 x i1>
174  %2 = freeze <8 x i1> %1
175  ret <8 x i1> %2
176}
177
178define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
179; SSE2-LABEL: bitcast_i16_16i1:
180; SSE2:       # %bb.0:
181; SSE2-NEXT:    movd %edi, %xmm0
182; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
183; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
184; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
185; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
186; SSE2-NEXT:    pand %xmm1, %xmm0
187; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
188; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
189; SSE2-NEXT:    retq
190;
191; SSSE3-LABEL: bitcast_i16_16i1:
192; SSSE3:       # %bb.0:
193; SSSE3-NEXT:    movd %edi, %xmm0
194; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
195; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
196; SSSE3-NEXT:    pand %xmm1, %xmm0
197; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
198; SSSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
199; SSSE3-NEXT:    retq
200;
201; AVX1-LABEL: bitcast_i16_16i1:
202; AVX1:       # %bb.0:
203; AVX1-NEXT:    vmovd %edi, %xmm0
204; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
205; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
206; AVX1-NEXT:    # xmm1 = mem[0,0]
207; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
208; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
209; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
210; AVX1-NEXT:    retq
211;
212; AVX2-LABEL: bitcast_i16_16i1:
213; AVX2:       # %bb.0:
214; AVX2-NEXT:    vmovd %edi, %xmm0
215; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
216; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
217; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
218; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
219; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
220; AVX2-NEXT:    retq
221;
222; AVX512-LABEL: bitcast_i16_16i1:
223; AVX512:       # %bb.0:
224; AVX512-NEXT:    kmovd %edi, %k0
225; AVX512-NEXT:    vpmovm2b %k0, %xmm0
226; AVX512-NEXT:    retq
227  %1 = bitcast i16 %a0 to <16 x i1>
228  ret <16 x i1> %1
229}
230
231define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
232; SSE2-SSSE3-LABEL: bitcast_i32_32i1:
233; SSE2-SSSE3:       # %bb.0:
234; SSE2-SSSE3-NEXT:    movq %rdi, %rax
235; SSE2-SSSE3-NEXT:    movl %esi, (%rdi)
236; SSE2-SSSE3-NEXT:    retq
237;
238; AVX1-LABEL: bitcast_i32_32i1:
239; AVX1:       # %bb.0:
240; AVX1-NEXT:    vmovd %edi, %xmm0
241; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
242; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
243; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
244; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
245; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
246; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
247; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
248; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
249; AVX1-NEXT:    # xmm2 = mem[0,0]
250; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
251; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
252; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
253; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
254; AVX1-NEXT:    retq
255;
256; AVX2-LABEL: bitcast_i32_32i1:
257; AVX2:       # %bb.0:
258; AVX2-NEXT:    vmovd %edi, %xmm0
259; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
260; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
261; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
262; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
263; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
264; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
265; AVX2-NEXT:    retq
266;
267; AVX512-LABEL: bitcast_i32_32i1:
268; AVX512:       # %bb.0:
269; AVX512-NEXT:    kmovd %edi, %k0
270; AVX512-NEXT:    vpmovm2b %k0, %ymm0
271; AVX512-NEXT:    retq
272  %1 = bitcast i32 %a0 to <32 x i1>
273  ret <32 x i1> %1
274}
275
276define <64 x i1> @bitcast_i64_64i1(i64 %a0) {
277; SSE2-SSSE3-LABEL: bitcast_i64_64i1:
278; SSE2-SSSE3:       # %bb.0:
279; SSE2-SSSE3-NEXT:    movq %rdi, %rax
280; SSE2-SSSE3-NEXT:    movq %rsi, (%rdi)
281; SSE2-SSSE3-NEXT:    retq
282;
283; AVX12-LABEL: bitcast_i64_64i1:
284; AVX12:       # %bb.0:
285; AVX12-NEXT:    movq %rdi, %rax
286; AVX12-NEXT:    movq %rsi, (%rdi)
287; AVX12-NEXT:    retq
288;
289; AVX512-LABEL: bitcast_i64_64i1:
290; AVX512:       # %bb.0:
291; AVX512-NEXT:    kmovq %rdi, %k0
292; AVX512-NEXT:    vpmovm2b %k0, %zmm0
293; AVX512-NEXT:    retq
294  %1 = bitcast i64 %a0 to <64 x i1>
295  ret <64 x i1> %1
296}
297