xref: /llvm-project/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (revision 19f657d55d679cc3949e9e4c1a5bf76cc4c031b1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW
11
12;
13; 128-bit vectors
14;
15
16define <2 x i64> @ext_i2_2i64(i2 %a0) {
17; SSE2-SSSE3-LABEL: ext_i2_2i64:
18; SSE2-SSSE3:       # %bb.0:
19; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
20; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
21; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
22; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
23; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
24; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
25; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
26; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
27; SSE2-SSSE3-NEXT:    retq
28;
29; AVX1-LABEL: ext_i2_2i64:
30; AVX1:       # %bb.0:
31; AVX1-NEXT:    vmovd %edi, %xmm0
32; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [1,2]
34; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
36; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
37; AVX1-NEXT:    retq
38;
39; AVX2-LABEL: ext_i2_2i64:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vmovd %edi, %xmm0
42; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
43; AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [1,2]
44; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
45; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
46; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
47; AVX2-NEXT:    retq
48;
49; AVX512F-LABEL: ext_i2_2i64:
50; AVX512F:       # %bb.0:
51; AVX512F-NEXT:    kmovw %edi, %k1
52; AVX512F-NEXT:    vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
53; AVX512F-NEXT:    vpsrlq $63, %xmm0, %xmm0
54; AVX512F-NEXT:    vzeroupper
55; AVX512F-NEXT:    retq
56;
57; AVX512VLBW-LABEL: ext_i2_2i64:
58; AVX512VLBW:       # %bb.0:
59; AVX512VLBW-NEXT:    kmovd %edi, %k1
60; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
61; AVX512VLBW-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
62; AVX512VLBW-NEXT:    vpsrlq $63, %xmm0, %xmm0
63; AVX512VLBW-NEXT:    retq
64  %1 = bitcast i2 %a0 to <2 x i1>
65  %2 = zext <2 x i1> %1 to <2 x i64>
66  ret <2 x i64> %2
67}
68
69define <4 x i32> @ext_i4_4i32(i4 %a0) {
70; SSE2-SSSE3-LABEL: ext_i4_4i32:
71; SSE2-SSSE3:       # %bb.0:
72; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
73; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
75; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
76; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
77; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
78; SSE2-SSSE3-NEXT:    retq
79;
80; AVX1-LABEL: ext_i4_4i32:
81; AVX1:       # %bb.0:
82; AVX1-NEXT:    vmovd %edi, %xmm0
83; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
84; AVX1-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
85; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
86; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
87; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
88; AVX1-NEXT:    retq
89;
90; AVX2-LABEL: ext_i4_4i32:
91; AVX2:       # %bb.0:
92; AVX2-NEXT:    vmovd %edi, %xmm0
93; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
94; AVX2-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8]
95; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
96; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
97; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
98; AVX2-NEXT:    retq
99;
100; AVX512F-LABEL: ext_i4_4i32:
101; AVX512F:       # %bb.0:
102; AVX512F-NEXT:    kmovw %edi, %k1
103; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
104; AVX512F-NEXT:    vpsrld $31, %xmm0, %xmm0
105; AVX512F-NEXT:    vzeroupper
106; AVX512F-NEXT:    retq
107;
108; AVX512VLBW-LABEL: ext_i4_4i32:
109; AVX512VLBW:       # %bb.0:
110; AVX512VLBW-NEXT:    kmovd %edi, %k1
111; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
112; AVX512VLBW-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
113; AVX512VLBW-NEXT:    vpsrld $31, %xmm0, %xmm0
114; AVX512VLBW-NEXT:    retq
115  %1 = bitcast i4 %a0 to <4 x i1>
116  %2 = zext <4 x i1> %1 to <4 x i32>
117  ret <4 x i32> %2
118}
119
120define <8 x i16> @ext_i8_8i16(i8 %a0) {
121; SSE2-SSSE3-LABEL: ext_i8_8i16:
122; SSE2-SSSE3:       # %bb.0:
123; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
124; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
126; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
127; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
128; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
129; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
130; SSE2-SSSE3-NEXT:    retq
131;
132; AVX1-LABEL: ext_i8_8i16:
133; AVX1:       # %bb.0:
134; AVX1-NEXT:    vmovd %edi, %xmm0
135; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
136; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
137; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
138; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
139; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
140; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
141; AVX1-NEXT:    retq
142;
143; AVX2-LABEL: ext_i8_8i16:
144; AVX2:       # %bb.0:
145; AVX2-NEXT:    vmovd %edi, %xmm0
146; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
147; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
148; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
149; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
150; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
151; AVX2-NEXT:    retq
152;
153; AVX512F-LABEL: ext_i8_8i16:
154; AVX512F:       # %bb.0:
155; AVX512F-NEXT:    kmovw %edi, %k1
156; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
157; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
158; AVX512F-NEXT:    vpsrlw $15, %xmm0, %xmm0
159; AVX512F-NEXT:    vzeroupper
160; AVX512F-NEXT:    retq
161;
162; AVX512VLBW-LABEL: ext_i8_8i16:
163; AVX512VLBW:       # %bb.0:
164; AVX512VLBW-NEXT:    kmovd %edi, %k0
165; AVX512VLBW-NEXT:    vpmovm2w %k0, %xmm0
166; AVX512VLBW-NEXT:    vpsrlw $15, %xmm0, %xmm0
167; AVX512VLBW-NEXT:    retq
168  %1 = bitcast i8 %a0 to <8 x i1>
169  %2 = zext <8 x i1> %1 to <8 x i16>
170  ret <8 x i16> %2
171}
172
173define <16 x i8> @ext_i16_16i8(i16 %a0) {
174; SSE2-LABEL: ext_i16_16i8:
175; SSE2:       # %bb.0:
176; SSE2-NEXT:    movd %edi, %xmm0
177; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
178; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
179; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
180; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
181; SSE2-NEXT:    pand %xmm1, %xmm0
182; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
183; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
184; SSE2-NEXT:    retq
185;
186; SSSE3-LABEL: ext_i16_16i8:
187; SSSE3:       # %bb.0:
188; SSSE3-NEXT:    movd %edi, %xmm0
189; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
190; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
191; SSSE3-NEXT:    pand %xmm1, %xmm0
192; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
193; SSSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
194; SSSE3-NEXT:    retq
195;
196; AVX1-LABEL: ext_i16_16i8:
197; AVX1:       # %bb.0:
198; AVX1-NEXT:    vmovd %edi, %xmm0
199; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
200; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
201; AVX1-NEXT:    # xmm1 = mem[0,0]
202; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
203; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
204; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
205; AVX1-NEXT:    retq
206;
207; AVX2-LABEL: ext_i16_16i8:
208; AVX2:       # %bb.0:
209; AVX2-NEXT:    vmovd %edi, %xmm0
210; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
211; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
212; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
213; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
214; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
215; AVX2-NEXT:    retq
216;
217; AVX512F-LABEL: ext_i16_16i8:
218; AVX512F:       # %bb.0:
219; AVX512F-NEXT:    kmovw %edi, %k1
220; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
221; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
222; AVX512F-NEXT:    vzeroupper
223; AVX512F-NEXT:    retq
224;
225; AVX512VLBW-LABEL: ext_i16_16i8:
226; AVX512VLBW:       # %bb.0:
227; AVX512VLBW-NEXT:    kmovd %edi, %k1
228; AVX512VLBW-NEXT:    vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
229; AVX512VLBW-NEXT:    retq
230  %1 = bitcast i16 %a0 to <16 x i1>
231  %2 = zext <16 x i1> %1 to <16 x i8>
232  ret <16 x i8> %2
233}
234
235;
236; 256-bit vectors
237;
238
239define <4 x i64> @ext_i4_4i64(i4 %a0) {
240; SSE2-SSSE3-LABEL: ext_i4_4i64:
241; SSE2-SSSE3:       # %bb.0:
242; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
243; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
244; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
245; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
246; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
247; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
248; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
249; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
250; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
251; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
252; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
253; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
254; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
255; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
256; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
257; SSE2-SSSE3-NEXT:    retq
258;
259; AVX1-LABEL: ext_i4_4i64:
260; AVX1:       # %bb.0:
261; AVX1-NEXT:    vmovd %edi, %xmm0
262; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
263; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
264; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [1,2,4,8]
265; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
266; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm1
267; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
268; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
269; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
270; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
271; AVX1-NEXT:    retq
272;
273; AVX2-LABEL: ext_i4_4i64:
274; AVX2:       # %bb.0:
275; AVX2-NEXT:    vmovd %edi, %xmm0
276; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
277; AVX2-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [1,2,4,8]
278; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
279; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
280; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
281; AVX2-NEXT:    retq
282;
283; AVX512F-LABEL: ext_i4_4i64:
284; AVX512F:       # %bb.0:
285; AVX512F-NEXT:    kmovw %edi, %k1
286; AVX512F-NEXT:    vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
287; AVX512F-NEXT:    vpsrlq $63, %ymm0, %ymm0
288; AVX512F-NEXT:    retq
289;
290; AVX512VLBW-LABEL: ext_i4_4i64:
291; AVX512VLBW:       # %bb.0:
292; AVX512VLBW-NEXT:    kmovd %edi, %k1
293; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
294; AVX512VLBW-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
295; AVX512VLBW-NEXT:    vpsrlq $63, %ymm0, %ymm0
296; AVX512VLBW-NEXT:    retq
297  %1 = bitcast i4 %a0 to <4 x i1>
298  %2 = zext <4 x i1> %1 to <4 x i64>
299  ret <4 x i64> %2
300}
301
302define <8 x i32> @ext_i8_8i32(i8 %a0) {
303; SSE2-SSSE3-LABEL: ext_i8_8i32:
304; SSE2-SSSE3:       # %bb.0:
305; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
306; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
307; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
308; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
309; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
310; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
311; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
312; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
313; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
314; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
315; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
316; SSE2-SSSE3-NEXT:    retq
317;
318; AVX1-LABEL: ext_i8_8i32:
319; AVX1:       # %bb.0:
320; AVX1-NEXT:    vmovd %edi, %xmm0
321; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
322; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
323; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
324; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
325; AVX1-NEXT:    vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
326; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
327; AVX1-NEXT:    retq
328;
329; AVX2-LABEL: ext_i8_8i32:
330; AVX2:       # %bb.0:
331; AVX2-NEXT:    vmovd %edi, %xmm0
332; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
333; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
334; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
335; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
336; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
337; AVX2-NEXT:    retq
338;
339; AVX512F-LABEL: ext_i8_8i32:
340; AVX512F:       # %bb.0:
341; AVX512F-NEXT:    kmovw %edi, %k1
342; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
343; AVX512F-NEXT:    vpsrld $31, %ymm0, %ymm0
344; AVX512F-NEXT:    retq
345;
346; AVX512VLBW-LABEL: ext_i8_8i32:
347; AVX512VLBW:       # %bb.0:
348; AVX512VLBW-NEXT:    kmovd %edi, %k1
349; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
350; AVX512VLBW-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
351; AVX512VLBW-NEXT:    vpsrld $31, %ymm0, %ymm0
352; AVX512VLBW-NEXT:    retq
353  %1 = bitcast i8 %a0 to <8 x i1>
354  %2 = zext <8 x i1> %1 to <8 x i32>
355  ret <8 x i32> %2
356}
357
358define <16 x i16> @ext_i16_16i16(i16 %a0) {
359; SSE2-SSSE3-LABEL: ext_i16_16i16:
360; SSE2-SSSE3:       # %bb.0:
361; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
362; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
363; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
364; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
365; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
366; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
367; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
368; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
369; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
370; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
371; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
372; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
373; SSE2-SSSE3-NEXT:    retq
374;
375; AVX1-LABEL: ext_i16_16i16:
376; AVX1:       # %bb.0:
377; AVX1-NEXT:    vmovd %edi, %xmm0
378; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
379; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
380; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
381; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
382; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
383; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1
384; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
385; AVX1-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
386; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
387; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
388; AVX1-NEXT:    retq
389;
390; AVX2-LABEL: ext_i16_16i16:
391; AVX2:       # %bb.0:
392; AVX2-NEXT:    vmovd %edi, %xmm0
393; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
394; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
395; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
396; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
397; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
398; AVX2-NEXT:    retq
399;
400; AVX512F-LABEL: ext_i16_16i16:
401; AVX512F:       # %bb.0:
402; AVX512F-NEXT:    kmovw %edi, %k1
403; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
404; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
405; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
406; AVX512F-NEXT:    retq
407;
408; AVX512VLBW-LABEL: ext_i16_16i16:
409; AVX512VLBW:       # %bb.0:
410; AVX512VLBW-NEXT:    kmovd %edi, %k0
411; AVX512VLBW-NEXT:    vpmovm2w %k0, %ymm0
412; AVX512VLBW-NEXT:    vpsrlw $15, %ymm0, %ymm0
413; AVX512VLBW-NEXT:    retq
414  %1 = bitcast i16 %a0 to <16 x i1>
415  %2 = zext <16 x i1> %1 to <16 x i16>
416  ret <16 x i16> %2
417}
418
419define <32 x i8> @ext_i32_32i8(i32 %a0) {
420; SSE2-SSSE3-LABEL: ext_i32_32i8:
421; SSE2-SSSE3:       # %bb.0:
422; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
423; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
424; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
425; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
426; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
427; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
428; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
429; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
430; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
431; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
432; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
433; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
434; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
435; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1
436; SSE2-SSSE3-NEXT:    retq
437;
438; AVX1-LABEL: ext_i32_32i8:
439; AVX1:       # %bb.0:
440; AVX1-NEXT:    vmovd %edi, %xmm0
441; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
442; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
443; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
444; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
445; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
446; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
447; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
448; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
449; AVX1-NEXT:    # xmm2 = mem[0,0]
450; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
451; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
452; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
453; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
454; AVX1-NEXT:    retq
455;
456; AVX2-LABEL: ext_i32_32i8:
457; AVX2:       # %bb.0:
458; AVX2-NEXT:    vmovd %edi, %xmm0
459; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
460; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
461; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
462; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
463; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
464; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
465; AVX2-NEXT:    retq
466;
467; AVX512F-LABEL: ext_i32_32i8:
468; AVX512F:       # %bb.0:
469; AVX512F-NEXT:    kmovw %edi, %k1
470; AVX512F-NEXT:    shrl $16, %edi
471; AVX512F-NEXT:    kmovw %edi, %k2
472; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
473; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
474; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
475; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k2} {z}
476; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
477; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
478; AVX512F-NEXT:    retq
479;
480; AVX512VLBW-LABEL: ext_i32_32i8:
481; AVX512VLBW:       # %bb.0:
482; AVX512VLBW-NEXT:    kmovd %edi, %k1
483; AVX512VLBW-NEXT:    vmovdqu8 {{.*#+}} ymm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
484; AVX512VLBW-NEXT:    retq
485  %1 = bitcast i32 %a0 to <32 x i1>
486  %2 = zext <32 x i1> %1 to <32 x i8>
487  ret <32 x i8> %2
488}
489
490;
491; 512-bit vectors
492;
493
494define <8 x i64> @ext_i8_8i64(i8 %a0) {
495; SSE2-SSSE3-LABEL: ext_i8_8i64:
496; SSE2-SSSE3:       # %bb.0:
497; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
498; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
499; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
500; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
501; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
502; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
503; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
504; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
505; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
506; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
507; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
508; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
509; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
510; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
511; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
512; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
513; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
514; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
515; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
516; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
517; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
518; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
519; SSE2-SSSE3-NEXT:    psrlq $63, %xmm2
520; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
521; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
522; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
523; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
524; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
525; SSE2-SSSE3-NEXT:    psrlq $63, %xmm3
526; SSE2-SSSE3-NEXT:    retq
527;
528; AVX1-LABEL: ext_i8_8i64:
529; AVX1:       # %bb.0:
530; AVX1-NEXT:    vmovd %edi, %xmm0
531; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
532; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
533; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
534; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm2
535; AVX1-NEXT:    vpcmpeqq %xmm0, %xmm2, %xmm0
536; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
537; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
538; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
539; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1]
540; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
541; AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [16,32,64,128]
542; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
543; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm3
544; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
545; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
546; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
547; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
548; AVX1-NEXT:    retq
549;
550; AVX2-LABEL: ext_i8_8i64:
551; AVX2:       # %bb.0:
552; AVX2-NEXT:    vmovd %edi, %xmm0
553; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm1
554; AVX2-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8]
555; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
556; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
557; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
558; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = [16,32,64,128]
559; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
560; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
561; AVX2-NEXT:    vpsrlq $63, %ymm1, %ymm1
562; AVX2-NEXT:    retq
563;
564; AVX512F-LABEL: ext_i8_8i64:
565; AVX512F:       # %bb.0:
566; AVX512F-NEXT:    kmovw %edi, %k1
567; AVX512F-NEXT:    vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
568; AVX512F-NEXT:    vpsrlq $63, %zmm0, %zmm0
569; AVX512F-NEXT:    retq
570;
571; AVX512VLBW-LABEL: ext_i8_8i64:
572; AVX512VLBW:       # %bb.0:
573; AVX512VLBW-NEXT:    kmovd %edi, %k1
574; AVX512VLBW-NEXT:    vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
575; AVX512VLBW-NEXT:    vpsrlq $63, %zmm0, %zmm0
576; AVX512VLBW-NEXT:    retq
577  %1 = bitcast i8 %a0 to <8 x i1>
578  %2 = zext <8 x i1> %1 to <8 x i64>
579  ret <8 x i64> %2
580}
581
582define <16 x i32> @ext_i16_16i32(i16 %a0) {
583; SSE2-SSSE3-LABEL: ext_i16_16i32:
584; SSE2-SSSE3:       # %bb.0:
585; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
586; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
587; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
588; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
589; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
590; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
591; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
592; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
593; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
594; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
595; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
596; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
597; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
598; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
599; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
600; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
601; SSE2-SSSE3-NEXT:    psrld $31, %xmm2
602; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
603; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
604; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
605; SSE2-SSSE3-NEXT:    psrld $31, %xmm3
606; SSE2-SSSE3-NEXT:    retq
607;
608; AVX1-LABEL: ext_i16_16i32:
609; AVX1:       # %bb.0:
610; AVX1-NEXT:    vmovd %edi, %xmm0
611; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
612; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
613; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
614; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
615; AVX1-NEXT:    vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
616; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
617; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
618; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
619; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
620; AVX1-NEXT:    vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
621; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
622; AVX1-NEXT:    retq
623;
624; AVX2-LABEL: ext_i16_16i32:
625; AVX2:       # %bb.0:
626; AVX2-NEXT:    vmovd %edi, %xmm0
627; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm1
628; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
629; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
630; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
631; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
632; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
633; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
634; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
635; AVX2-NEXT:    vpsrld $31, %ymm1, %ymm1
636; AVX2-NEXT:    retq
637;
638; AVX512F-LABEL: ext_i16_16i32:
639; AVX512F:       # %bb.0:
640; AVX512F-NEXT:    kmovw %edi, %k1
641; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
642; AVX512F-NEXT:    vpsrld $31, %zmm0, %zmm0
643; AVX512F-NEXT:    retq
644;
645; AVX512VLBW-LABEL: ext_i16_16i32:
646; AVX512VLBW:       # %bb.0:
647; AVX512VLBW-NEXT:    kmovd %edi, %k1
648; AVX512VLBW-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
649; AVX512VLBW-NEXT:    vpsrld $31, %zmm0, %zmm0
650; AVX512VLBW-NEXT:    retq
651  %1 = bitcast i16 %a0 to <16 x i1>
652  %2 = zext <16 x i1> %1 to <16 x i32>
653  ret <16 x i32> %2
654}
655
656define <32 x i16> @ext_i32_32i16(i32 %a0) {
657; SSE2-SSSE3-LABEL: ext_i32_32i16:
658; SSE2-SSSE3:       # %bb.0:
659; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
660; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
661; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
662; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
663; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
664; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
665; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
666; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
667; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
668; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
669; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
670; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
671; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
672; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
673; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
674; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
675; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
676; SSE2-SSSE3-NEXT:    psrlw $15, %xmm2
677; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
678; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
679; SSE2-SSSE3-NEXT:    psrlw $15, %xmm3
680; SSE2-SSSE3-NEXT:    retq
681;
682; AVX1-LABEL: ext_i32_32i16:
683; AVX1:       # %bb.0:
684; AVX1-NEXT:    vmovd %edi, %xmm1
685; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
686; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
687; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
688; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
689; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
690; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
691; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
692; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
693; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
694; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
695; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
696; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
697; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
698; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
699; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
700; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
701; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
702; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
703; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
704; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
705; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
706; AVX1-NEXT:    retq
707;
708; AVX2-LABEL: ext_i32_32i16:
709; AVX2:       # %bb.0:
710; AVX2-NEXT:    vmovd %edi, %xmm0
711; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
712; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
713; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
714; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
715; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
716; AVX2-NEXT:    shrl $16, %edi
717; AVX2-NEXT:    vmovd %edi, %xmm2
718; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
719; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
720; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
721; AVX2-NEXT:    vpsrlw $15, %ymm1, %ymm1
722; AVX2-NEXT:    retq
723;
724; AVX512F-LABEL: ext_i32_32i16:
725; AVX512F:       # %bb.0:
726; AVX512F-NEXT:    kmovw %edi, %k1
727; AVX512F-NEXT:    shrl $16, %edi
728; AVX512F-NEXT:    kmovw %edi, %k2
729; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
730; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
731; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
732; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm1 {%k2} {z} = -1
733; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
734; AVX512F-NEXT:    vpsrlw $15, %ymm1, %ymm1
735; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
736; AVX512F-NEXT:    retq
737;
738; AVX512VLBW-LABEL: ext_i32_32i16:
739; AVX512VLBW:       # %bb.0:
740; AVX512VLBW-NEXT:    kmovd %edi, %k0
741; AVX512VLBW-NEXT:    vpmovm2w %k0, %zmm0
742; AVX512VLBW-NEXT:    vpsrlw $15, %zmm0, %zmm0
743; AVX512VLBW-NEXT:    retq
744  %1 = bitcast i32 %a0 to <32 x i1>
745  %2 = zext <32 x i1> %1 to <32 x i16>
746  ret <32 x i16> %2
747}
748
749define <64 x i8> @ext_i64_64i8(i64 %a0) {
750; SSE2-SSSE3-LABEL: ext_i64_64i8:
751; SSE2-SSSE3:       # %bb.0:
752; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
753; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
754; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
755; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
756; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
757; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
758; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
759; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
760; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
761; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
762; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
763; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
764; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
765; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
766; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
767; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
768; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
769; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
770; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm2
771; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
772; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
773; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
774; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
775; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
776; SSE2-SSSE3-NEXT:    retq
777;
778; AVX1-LABEL: ext_i64_64i8:
779; AVX1:       # %bb.0:
780; AVX1-NEXT:    vmovq %rdi, %xmm0
781; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
782; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
783; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
784; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
785; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
786; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
787; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
788; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
789; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm3
790; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
791; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
792; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
793; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
794; AVX1-NEXT:    vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
795; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
796; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
797; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
798; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
799; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
800; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm4, %xmm4
801; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
802; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
803; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
804; AVX1-NEXT:    retq
805;
806; AVX2-LABEL: ext_i64_64i8:
807; AVX2:       # %bb.0:
808; AVX2-NEXT:    vmovq %rdi, %xmm0
809; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm1
810; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
811; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
812; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
813; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
814; AVX2-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
815; AVX2-NEXT:    vpand %ymm3, %ymm0, %ymm0
816; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31]
817; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
818; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
819; AVX2-NEXT:    vpand %ymm3, %ymm1, %ymm1
820; AVX2-NEXT:    retq
821;
822; AVX512F-LABEL: ext_i64_64i8:
823; AVX512F:       # %bb.0:
824; AVX512F-NEXT:    movq %rdi, %rax
825; AVX512F-NEXT:    movl %edi, %ecx
826; AVX512F-NEXT:    kmovw %edi, %k1
827; AVX512F-NEXT:    shrq $32, %rdi
828; AVX512F-NEXT:    shrq $48, %rax
829; AVX512F-NEXT:    shrl $16, %ecx
830; AVX512F-NEXT:    kmovw %ecx, %k2
831; AVX512F-NEXT:    kmovw %eax, %k3
832; AVX512F-NEXT:    kmovw %edi, %k4
833; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
834; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k4} {z}
835; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
836; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k3} {z}
837; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
838; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
839; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
840; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
841; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k2} {z}
842; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
843; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
844; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
845; AVX512F-NEXT:    retq
846;
847; AVX512VLBW-LABEL: ext_i64_64i8:
848; AVX512VLBW:       # %bb.0:
849; AVX512VLBW-NEXT:    kmovq %rdi, %k1
850; AVX512VLBW-NEXT:    vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
851; AVX512VLBW-NEXT:    retq
852  %1 = bitcast i64 %a0 to <64 x i1>
853  %2 = zext <64 x i1> %1 to <64 x i8>
854  ret <64 x i8> %2
855}
856