xref: /llvm-project/llvm/test/CodeGen/X86/vector-zext.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
12
13define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
14; SSE2-LABEL: zext_16i8_to_8i16:
15; SSE2:       # %bb.0: # %entry
16; SSE2-NEXT:    pxor %xmm1, %xmm1
17; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
18; SSE2-NEXT:    retq
19;
20; SSSE3-LABEL: zext_16i8_to_8i16:
21; SSSE3:       # %bb.0: # %entry
22; SSSE3-NEXT:    pxor %xmm1, %xmm1
23; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24; SSSE3-NEXT:    retq
25;
26; SSE41-LABEL: zext_16i8_to_8i16:
27; SSE41:       # %bb.0: # %entry
28; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
29; SSE41-NEXT:    retq
30;
31; AVX-LABEL: zext_16i8_to_8i16:
32; AVX:       # %bb.0: # %entry
33; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
34; AVX-NEXT:    retq
35entry:
36  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
37  %C = zext <8 x i8> %B to <8 x i16>
38  ret <8 x i16> %C
39}
40
41; PR17654
42define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
43; SSE2-LABEL: zext_16i8_to_16i16:
44; SSE2:       # %bb.0: # %entry
45; SSE2-NEXT:    movdqa %xmm0, %xmm1
46; SSE2-NEXT:    pxor %xmm2, %xmm2
47; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
48; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
49; SSE2-NEXT:    retq
50;
51; SSSE3-LABEL: zext_16i8_to_16i16:
52; SSSE3:       # %bb.0: # %entry
53; SSSE3-NEXT:    movdqa %xmm0, %xmm1
54; SSSE3-NEXT:    pxor %xmm2, %xmm2
55; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
56; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
57; SSSE3-NEXT:    retq
58;
59; SSE41-LABEL: zext_16i8_to_16i16:
60; SSE41:       # %bb.0: # %entry
61; SSE41-NEXT:    movdqa %xmm0, %xmm1
62; SSE41-NEXT:    pxor %xmm2, %xmm2
63; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
64; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
65; SSE41-NEXT:    retq
66;
67; AVX1-LABEL: zext_16i8_to_16i16:
68; AVX1:       # %bb.0: # %entry
69; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
70; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
71; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
72; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
73; AVX1-NEXT:    retq
74;
75; AVX2-LABEL: zext_16i8_to_16i16:
76; AVX2:       # %bb.0: # %entry
77; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
78; AVX2-NEXT:    retq
79;
80; AVX512-LABEL: zext_16i8_to_16i16:
81; AVX512:       # %bb.0: # %entry
82; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
83; AVX512-NEXT:    retq
84entry:
85  %B = zext <16 x i8> %A to <16 x i16>
86  ret <16 x i16> %B
87}
88
89define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
90; SSE2-LABEL: zext_32i8_to_32i16:
91; SSE2:       # %bb.0: # %entry
92; SSE2-NEXT:    movdqa %xmm1, %xmm3
93; SSE2-NEXT:    movdqa %xmm0, %xmm1
94; SSE2-NEXT:    pxor %xmm4, %xmm4
95; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
96; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
97; SSE2-NEXT:    movdqa %xmm3, %xmm2
98; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
99; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
100; SSE2-NEXT:    retq
101;
102; SSSE3-LABEL: zext_32i8_to_32i16:
103; SSSE3:       # %bb.0: # %entry
104; SSSE3-NEXT:    movdqa %xmm1, %xmm3
105; SSSE3-NEXT:    movdqa %xmm0, %xmm1
106; SSSE3-NEXT:    pxor %xmm4, %xmm4
107; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
108; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
109; SSSE3-NEXT:    movdqa %xmm3, %xmm2
110; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
111; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
112; SSSE3-NEXT:    retq
113;
114; SSE41-LABEL: zext_32i8_to_32i16:
115; SSE41:       # %bb.0: # %entry
116; SSE41-NEXT:    movdqa %xmm1, %xmm3
117; SSE41-NEXT:    movdqa %xmm0, %xmm1
118; SSE41-NEXT:    pxor %xmm4, %xmm4
119; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
120; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
121; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
122; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
123; SSE41-NEXT:    retq
124;
125; AVX1-LABEL: zext_32i8_to_32i16:
126; AVX1:       # %bb.0: # %entry
127; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
128; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
129; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
130; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
131; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
132; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
133; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
134; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
135; AVX1-NEXT:    vmovaps %ymm2, %ymm0
136; AVX1-NEXT:    retq
137;
138; AVX2-LABEL: zext_32i8_to_32i16:
139; AVX2:       # %bb.0: # %entry
140; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
142; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
143; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
144; AVX2-NEXT:    retq
145;
146; AVX512F-LABEL: zext_32i8_to_32i16:
147; AVX512F:       # %bb.0: # %entry
148; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
149; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
150; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
151; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
152; AVX512F-NEXT:    retq
153;
154; AVX512BW-LABEL: zext_32i8_to_32i16:
155; AVX512BW:       # %bb.0: # %entry
156; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
157; AVX512BW-NEXT:    retq
158entry:
159  %B = zext <32 x i8> %A to <32 x i16>
160  ret <32 x i16> %B
161}
162
163define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
164; SSE2-LABEL: zext_16i8_to_4i32:
165; SSE2:       # %bb.0: # %entry
166; SSE2-NEXT:    pxor %xmm1, %xmm1
167; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
168; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
169; SSE2-NEXT:    retq
170;
171; SSSE3-LABEL: zext_16i8_to_4i32:
172; SSSE3:       # %bb.0: # %entry
173; SSSE3-NEXT:    pxor %xmm1, %xmm1
174; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
175; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
176; SSSE3-NEXT:    retq
177;
178; SSE41-LABEL: zext_16i8_to_4i32:
179; SSE41:       # %bb.0: # %entry
180; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
181; SSE41-NEXT:    retq
182;
183; AVX-LABEL: zext_16i8_to_4i32:
184; AVX:       # %bb.0: # %entry
185; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
186; AVX-NEXT:    retq
187entry:
188  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
189  %C = zext <4 x i8> %B to <4 x i32>
190  ret <4 x i32> %C
191}
192
193define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
194; SSE2-LABEL: zext_16i8_to_8i32:
195; SSE2:       # %bb.0: # %entry
196; SSE2-NEXT:    movdqa %xmm0, %xmm1
197; SSE2-NEXT:    pxor %xmm2, %xmm2
198; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
199; SSE2-NEXT:    movdqa %xmm1, %xmm0
200; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
201; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
202; SSE2-NEXT:    retq
203;
204; SSSE3-LABEL: zext_16i8_to_8i32:
205; SSSE3:       # %bb.0: # %entry
206; SSSE3-NEXT:    movdqa %xmm0, %xmm1
207; SSSE3-NEXT:    pxor %xmm2, %xmm2
208; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
209; SSSE3-NEXT:    movdqa %xmm1, %xmm0
210; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
211; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
212; SSSE3-NEXT:    retq
213;
214; SSE41-LABEL: zext_16i8_to_8i32:
215; SSE41:       # %bb.0: # %entry
216; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
217; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
218; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
219; SSE41-NEXT:    movdqa %xmm2, %xmm0
220; SSE41-NEXT:    retq
221;
222; AVX1-LABEL: zext_16i8_to_8i32:
223; AVX1:       # %bb.0: # %entry
224; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
225; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
226; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
227; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
228; AVX1-NEXT:    retq
229;
230; AVX2-LABEL: zext_16i8_to_8i32:
231; AVX2:       # %bb.0: # %entry
232; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
233; AVX2-NEXT:    retq
234;
235; AVX512-LABEL: zext_16i8_to_8i32:
236; AVX512:       # %bb.0: # %entry
237; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
238; AVX512-NEXT:    retq
239entry:
240  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
241  %C = zext <8 x i8> %B to <8 x i32>
242  ret <8 x i32> %C
243}
244
245define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
246; SSE2-LABEL: zext_16i8_to_16i32:
247; SSE2:       # %bb.0: # %entry
248; SSE2-NEXT:    movdqa %xmm0, %xmm3
249; SSE2-NEXT:    pxor %xmm4, %xmm4
250; SSE2-NEXT:    movdqa %xmm0, %xmm1
251; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
252; SSE2-NEXT:    movdqa %xmm1, %xmm0
253; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
254; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
255; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
256; SSE2-NEXT:    movdqa %xmm3, %xmm2
257; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
258; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
259; SSE2-NEXT:    retq
260;
261; SSSE3-LABEL: zext_16i8_to_16i32:
262; SSSE3:       # %bb.0: # %entry
263; SSSE3-NEXT:    movdqa %xmm0, %xmm3
264; SSSE3-NEXT:    pxor %xmm4, %xmm4
265; SSSE3-NEXT:    movdqa %xmm0, %xmm1
266; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
267; SSSE3-NEXT:    movdqa %xmm1, %xmm0
268; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
269; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
270; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
271; SSSE3-NEXT:    movdqa %xmm3, %xmm2
272; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
273; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
274; SSSE3-NEXT:    retq
275;
276; SSE41-LABEL: zext_16i8_to_16i32:
277; SSE41:       # %bb.0: # %entry
278; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
279; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
280; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
281; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
282; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
283; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
284; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
285; SSE41-NEXT:    movdqa %xmm4, %xmm0
286; SSE41-NEXT:    retq
287;
288; AVX1-LABEL: zext_16i8_to_16i32:
289; AVX1:       # %bb.0: # %entry
290; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
291; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
292; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
293; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
294; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
295; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
296; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
297; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
298; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
299; AVX1-NEXT:    vmovaps %ymm2, %ymm0
300; AVX1-NEXT:    retq
301;
302; AVX2-LABEL: zext_16i8_to_16i32:
303; AVX2:       # %bb.0: # %entry
304; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
305; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
306; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
307; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
308; AVX2-NEXT:    retq
309;
310; AVX512-LABEL: zext_16i8_to_16i32:
311; AVX512:       # %bb.0: # %entry
312; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
313; AVX512-NEXT:    retq
314entry:
315  %B = zext <16 x i8> %A to <16 x i32>
316  ret <16 x i32> %B
317}
318
319define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
320; SSE2-LABEL: zext_16i8_to_2i64:
321; SSE2:       # %bb.0: # %entry
322; SSE2-NEXT:    pxor %xmm1, %xmm1
323; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
324; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
325; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
326; SSE2-NEXT:    retq
327;
328; SSSE3-LABEL: zext_16i8_to_2i64:
329; SSSE3:       # %bb.0: # %entry
330; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
331; SSSE3-NEXT:    retq
332;
333; SSE41-LABEL: zext_16i8_to_2i64:
334; SSE41:       # %bb.0: # %entry
335; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
336; SSE41-NEXT:    retq
337;
338; AVX-LABEL: zext_16i8_to_2i64:
339; AVX:       # %bb.0: # %entry
340; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
341; AVX-NEXT:    retq
342entry:
343  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
344  %C = zext <2 x i8> %B to <2 x i64>
345  ret <2 x i64> %C
346}
347
348define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
349; SSE2-LABEL: zext_16i8_to_4i64:
350; SSE2:       # %bb.0: # %entry
351; SSE2-NEXT:    movdqa %xmm0, %xmm1
352; SSE2-NEXT:    pxor %xmm2, %xmm2
353; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
354; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
355; SSE2-NEXT:    movdqa %xmm1, %xmm0
356; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
357; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
358; SSE2-NEXT:    retq
359;
360; SSSE3-LABEL: zext_16i8_to_4i64:
361; SSSE3:       # %bb.0: # %entry
362; SSSE3-NEXT:    movdqa %xmm0, %xmm1
363; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
364; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
365; SSSE3-NEXT:    retq
366;
367; SSE41-LABEL: zext_16i8_to_4i64:
368; SSE41:       # %bb.0: # %entry
369; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
370; SSE41-NEXT:    psrld $16, %xmm0
371; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
372; SSE41-NEXT:    movdqa %xmm2, %xmm0
373; SSE41-NEXT:    retq
374;
375; AVX1-LABEL: zext_16i8_to_4i64:
376; AVX1:       # %bb.0: # %entry
377; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
378; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
379; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
380; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
381; AVX1-NEXT:    retq
382;
383; AVX2-LABEL: zext_16i8_to_4i64:
384; AVX2:       # %bb.0: # %entry
385; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
386; AVX2-NEXT:    retq
387;
388; AVX512-LABEL: zext_16i8_to_4i64:
389; AVX512:       # %bb.0: # %entry
390; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
391; AVX512-NEXT:    retq
392entry:
393  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
394  %C = zext <4 x i8> %B to <4 x i64>
395  ret <4 x i64> %C
396}
397
398define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
399; SSE2-LABEL: zext_16i8_to_8i64:
400; SSE2:       # %bb.0: # %entry
401; SSE2-NEXT:    movdqa %xmm0, %xmm3
402; SSE2-NEXT:    pxor %xmm4, %xmm4
403; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
404; SSE2-NEXT:    movdqa %xmm3, %xmm1
405; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
406; SSE2-NEXT:    movdqa %xmm1, %xmm0
407; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
408; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
409; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
410; SSE2-NEXT:    movdqa %xmm3, %xmm2
411; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
412; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
413; SSE2-NEXT:    retq
414;
415; SSSE3-LABEL: zext_16i8_to_8i64:
416; SSSE3:       # %bb.0: # %entry
417; SSSE3-NEXT:    movdqa %xmm0, %xmm3
418; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
419; SSSE3-NEXT:    movdqa %xmm3, %xmm1
420; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
421; SSSE3-NEXT:    movdqa %xmm3, %xmm2
422; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
423; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
424; SSSE3-NEXT:    retq
425;
426; SSE41-LABEL: zext_16i8_to_8i64:
427; SSE41:       # %bb.0: # %entry
428; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
429; SSE41-NEXT:    movdqa %xmm0, %xmm1
430; SSE41-NEXT:    psrld $16, %xmm1
431; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
432; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
433; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
434; SSE41-NEXT:    psrlq $48, %xmm0
435; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
436; SSE41-NEXT:    movdqa %xmm4, %xmm0
437; SSE41-NEXT:    retq
438;
439; AVX1-LABEL: zext_16i8_to_8i64:
440; AVX1:       # %bb.0: # %entry
441; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
442; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
443; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
444; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
445; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
446; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
447; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
448; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
449; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
450; AVX1-NEXT:    vmovaps %ymm2, %ymm0
451; AVX1-NEXT:    retq
452;
453; AVX2-LABEL: zext_16i8_to_8i64:
454; AVX2:       # %bb.0: # %entry
455; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
456; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
457; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
458; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
459; AVX2-NEXT:    retq
460;
461; AVX512-LABEL: zext_16i8_to_8i64:
462; AVX512:       # %bb.0: # %entry
463; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
464; AVX512-NEXT:    retq
465entry:
466  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
467  %C = zext <8 x i8> %B to <8 x i64>
468  ret <8 x i64> %C
469}
470
471define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
472; SSE2-LABEL: zext_8i16_to_4i32:
473; SSE2:       # %bb.0: # %entry
474; SSE2-NEXT:    pxor %xmm1, %xmm1
475; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
476; SSE2-NEXT:    retq
477;
478; SSSE3-LABEL: zext_8i16_to_4i32:
479; SSSE3:       # %bb.0: # %entry
480; SSSE3-NEXT:    pxor %xmm1, %xmm1
481; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
482; SSSE3-NEXT:    retq
483;
484; SSE41-LABEL: zext_8i16_to_4i32:
485; SSE41:       # %bb.0: # %entry
486; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
487; SSE41-NEXT:    retq
488;
489; AVX-LABEL: zext_8i16_to_4i32:
490; AVX:       # %bb.0: # %entry
491; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
492; AVX-NEXT:    retq
493entry:
494  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
495  %C = zext <4 x i16> %B to <4 x i32>
496  ret <4 x i32> %C
497}
498
499define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
500; SSE2-LABEL: zext_8i16_to_8i32:
501; SSE2:       # %bb.0: # %entry
502; SSE2-NEXT:    movdqa %xmm0, %xmm1
503; SSE2-NEXT:    pxor %xmm2, %xmm2
504; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
505; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
506; SSE2-NEXT:    retq
507;
508; SSSE3-LABEL: zext_8i16_to_8i32:
509; SSSE3:       # %bb.0: # %entry
510; SSSE3-NEXT:    movdqa %xmm0, %xmm1
511; SSSE3-NEXT:    pxor %xmm2, %xmm2
512; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
513; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
514; SSSE3-NEXT:    retq
515;
516; SSE41-LABEL: zext_8i16_to_8i32:
517; SSE41:       # %bb.0: # %entry
518; SSE41-NEXT:    movdqa %xmm0, %xmm1
519; SSE41-NEXT:    pxor %xmm2, %xmm2
520; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
521; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
522; SSE41-NEXT:    retq
523;
524; AVX1-LABEL: zext_8i16_to_8i32:
525; AVX1:       # %bb.0: # %entry
526; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
527; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
528; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
529; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
530; AVX1-NEXT:    retq
531;
532; AVX2-LABEL: zext_8i16_to_8i32:
533; AVX2:       # %bb.0: # %entry
534; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
535; AVX2-NEXT:    retq
536;
537; AVX512-LABEL: zext_8i16_to_8i32:
538; AVX512:       # %bb.0: # %entry
539; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
540; AVX512-NEXT:    retq
541entry:
542  %B = zext <8 x i16> %A to <8 x i32>
543  ret <8 x i32>%B
544}
545
546define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
547; SSE2-LABEL: zext_16i16_to_16i32:
548; SSE2:       # %bb.0: # %entry
549; SSE2-NEXT:    movdqa %xmm1, %xmm3
550; SSE2-NEXT:    movdqa %xmm0, %xmm1
551; SSE2-NEXT:    pxor %xmm4, %xmm4
552; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
553; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
554; SSE2-NEXT:    movdqa %xmm3, %xmm2
555; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
556; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
557; SSE2-NEXT:    retq
558;
559; SSSE3-LABEL: zext_16i16_to_16i32:
560; SSSE3:       # %bb.0: # %entry
561; SSSE3-NEXT:    movdqa %xmm1, %xmm3
562; SSSE3-NEXT:    movdqa %xmm0, %xmm1
563; SSSE3-NEXT:    pxor %xmm4, %xmm4
564; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
565; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
566; SSSE3-NEXT:    movdqa %xmm3, %xmm2
567; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
568; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
569; SSSE3-NEXT:    retq
570;
571; SSE41-LABEL: zext_16i16_to_16i32:
572; SSE41:       # %bb.0: # %entry
573; SSE41-NEXT:    movdqa %xmm1, %xmm3
574; SSE41-NEXT:    movdqa %xmm0, %xmm1
575; SSE41-NEXT:    pxor %xmm4, %xmm4
576; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
577; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
578; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
579; SSE41-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
580; SSE41-NEXT:    retq
581;
582; AVX1-LABEL: zext_16i16_to_16i32:
583; AVX1:       # %bb.0: # %entry
584; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
585; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
586; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
587; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
588; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
589; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
590; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
591; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
592; AVX1-NEXT:    vmovaps %ymm2, %ymm0
593; AVX1-NEXT:    retq
594;
595; AVX2-LABEL: zext_16i16_to_16i32:
596; AVX2:       # %bb.0: # %entry
597; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
598; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
599; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
600; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
601; AVX2-NEXT:    retq
602;
603; AVX512-LABEL: zext_16i16_to_16i32:
604; AVX512:       # %bb.0: # %entry
605; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
606; AVX512-NEXT:    retq
607entry:
608  %B = zext <16 x i16> %A to <16 x i32>
609  ret <16 x i32> %B
610}
611
612define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
613; SSE2-LABEL: zext_8i16_to_2i64:
614; SSE2:       # %bb.0: # %entry
615; SSE2-NEXT:    pxor %xmm1, %xmm1
616; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
617; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
618; SSE2-NEXT:    retq
619;
620; SSSE3-LABEL: zext_8i16_to_2i64:
621; SSSE3:       # %bb.0: # %entry
622; SSSE3-NEXT:    pxor %xmm1, %xmm1
623; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
624; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
625; SSSE3-NEXT:    retq
626;
627; SSE41-LABEL: zext_8i16_to_2i64:
628; SSE41:       # %bb.0: # %entry
629; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
630; SSE41-NEXT:    retq
631;
632; AVX-LABEL: zext_8i16_to_2i64:
633; AVX:       # %bb.0: # %entry
634; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
635; AVX-NEXT:    retq
636entry:
637  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
638  %C = zext <2 x i16> %B to <2 x i64>
639  ret <2 x i64> %C
640}
641
642define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
643; SSE2-LABEL: zext_8i16_to_4i64:
644; SSE2:       # %bb.0: # %entry
645; SSE2-NEXT:    movdqa %xmm0, %xmm1
646; SSE2-NEXT:    pxor %xmm2, %xmm2
647; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
648; SSE2-NEXT:    movdqa %xmm1, %xmm0
649; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
650; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
651; SSE2-NEXT:    retq
652;
653; SSSE3-LABEL: zext_8i16_to_4i64:
654; SSSE3:       # %bb.0: # %entry
655; SSSE3-NEXT:    movdqa %xmm0, %xmm1
656; SSSE3-NEXT:    pxor %xmm2, %xmm2
657; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
658; SSSE3-NEXT:    movdqa %xmm1, %xmm0
659; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
660; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
661; SSSE3-NEXT:    retq
662;
663; SSE41-LABEL: zext_8i16_to_4i64:
664; SSE41:       # %bb.0: # %entry
665; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
666; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
667; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
668; SSE41-NEXT:    movdqa %xmm2, %xmm0
669; SSE41-NEXT:    retq
670;
671; AVX1-LABEL: zext_8i16_to_4i64:
672; AVX1:       # %bb.0: # %entry
673; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
675; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
676; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
677; AVX1-NEXT:    retq
678;
679; AVX2-LABEL: zext_8i16_to_4i64:
680; AVX2:       # %bb.0: # %entry
681; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
682; AVX2-NEXT:    retq
683;
684; AVX512-LABEL: zext_8i16_to_4i64:
685; AVX512:       # %bb.0: # %entry
686; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
687; AVX512-NEXT:    retq
688entry:
689  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
690  %C = zext <4 x i16> %B to <4 x i64>
691  ret <4 x i64> %C
692}
693
694define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
695; SSE2-LABEL: zext_8i16_to_8i64:
696; SSE2:       # %bb.0: # %entry
697; SSE2-NEXT:    movdqa %xmm0, %xmm3
698; SSE2-NEXT:    pxor %xmm4, %xmm4
699; SSE2-NEXT:    movdqa %xmm0, %xmm1
700; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
701; SSE2-NEXT:    movdqa %xmm1, %xmm0
702; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
703; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
704; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
705; SSE2-NEXT:    movdqa %xmm3, %xmm2
706; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
707; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
708; SSE2-NEXT:    retq
709;
710; SSSE3-LABEL: zext_8i16_to_8i64:
711; SSSE3:       # %bb.0: # %entry
712; SSSE3-NEXT:    movdqa %xmm0, %xmm3
713; SSSE3-NEXT:    pxor %xmm4, %xmm4
714; SSSE3-NEXT:    movdqa %xmm0, %xmm1
715; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
716; SSSE3-NEXT:    movdqa %xmm1, %xmm0
717; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
718; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
719; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
720; SSSE3-NEXT:    movdqa %xmm3, %xmm2
721; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
722; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
723; SSSE3-NEXT:    retq
724;
725; SSE41-LABEL: zext_8i16_to_8i64:
726; SSE41:       # %bb.0: # %entry
727; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
728; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
729; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
730; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
731; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
732; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
733; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
734; SSE41-NEXT:    movdqa %xmm4, %xmm0
735; SSE41-NEXT:    retq
736;
737; AVX1-LABEL: zext_8i16_to_8i64:
738; AVX1:       # %bb.0: # %entry
739; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
740; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
741; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
742; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
743; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
744; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
745; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
746; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
747; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
748; AVX1-NEXT:    vmovaps %ymm2, %ymm0
749; AVX1-NEXT:    retq
750;
751; AVX2-LABEL: zext_8i16_to_8i64:
752; AVX2:       # %bb.0: # %entry
753; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
755; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
756; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
757; AVX2-NEXT:    retq
758;
759; AVX512-LABEL: zext_8i16_to_8i64:
760; AVX512:       # %bb.0: # %entry
761; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
762; AVX512-NEXT:    retq
763entry:
764  %B = zext <8 x i16> %A to <8 x i64>
765  ret <8 x i64> %B
766}
767
768define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
769; SSE2-LABEL: zext_4i32_to_2i64:
770; SSE2:       # %bb.0: # %entry
771; SSE2-NEXT:    xorps %xmm1, %xmm1
772; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
773; SSE2-NEXT:    retq
774;
775; SSSE3-LABEL: zext_4i32_to_2i64:
776; SSSE3:       # %bb.0: # %entry
777; SSSE3-NEXT:    xorps %xmm1, %xmm1
778; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
779; SSSE3-NEXT:    retq
780;
781; SSE41-LABEL: zext_4i32_to_2i64:
782; SSE41:       # %bb.0: # %entry
783; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
784; SSE41-NEXT:    retq
785;
786; AVX-LABEL: zext_4i32_to_2i64:
787; AVX:       # %bb.0: # %entry
788; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
789; AVX-NEXT:    retq
790entry:
791  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
792  %C = zext <2 x i32> %B to <2 x i64>
793  ret <2 x i64> %C
794}
795
796define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
797; SSE2-LABEL: zext_4i32_to_4i64:
798; SSE2:       # %bb.0: # %entry
799; SSE2-NEXT:    movaps %xmm0, %xmm1
800; SSE2-NEXT:    xorps %xmm2, %xmm2
801; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
802; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
803; SSE2-NEXT:    retq
804;
805; SSSE3-LABEL: zext_4i32_to_4i64:
806; SSSE3:       # %bb.0: # %entry
807; SSSE3-NEXT:    movaps %xmm0, %xmm1
808; SSSE3-NEXT:    xorps %xmm2, %xmm2
809; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
810; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
811; SSSE3-NEXT:    retq
812;
813; SSE41-LABEL: zext_4i32_to_4i64:
814; SSE41:       # %bb.0: # %entry
815; SSE41-NEXT:    movdqa %xmm0, %xmm1
816; SSE41-NEXT:    pxor %xmm2, %xmm2
817; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
818; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
819; SSE41-NEXT:    retq
820;
821; AVX1-LABEL: zext_4i32_to_4i64:
822; AVX1:       # %bb.0: # %entry
823; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
824; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
825; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
826; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
827; AVX1-NEXT:    retq
828;
829; AVX2-LABEL: zext_4i32_to_4i64:
830; AVX2:       # %bb.0: # %entry
831; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
832; AVX2-NEXT:    retq
833;
834; AVX512-LABEL: zext_4i32_to_4i64:
835; AVX512:       # %bb.0: # %entry
836; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
837; AVX512-NEXT:    retq
838entry:
839  %B = zext <4 x i32> %A to <4 x i64>
840  ret <4 x i64>%B
841}
842
843define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
844; SSE2-LABEL: zext_8i32_to_8i64:
845; SSE2:       # %bb.0: # %entry
846; SSE2-NEXT:    movaps %xmm1, %xmm3
847; SSE2-NEXT:    movaps %xmm0, %xmm1
848; SSE2-NEXT:    xorps %xmm4, %xmm4
849; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
850; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
851; SSE2-NEXT:    movaps %xmm3, %xmm2
852; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
853; SSE2-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
854; SSE2-NEXT:    retq
855;
856; SSSE3-LABEL: zext_8i32_to_8i64:
857; SSSE3:       # %bb.0: # %entry
858; SSSE3-NEXT:    movaps %xmm1, %xmm3
859; SSSE3-NEXT:    movaps %xmm0, %xmm1
860; SSSE3-NEXT:    xorps %xmm4, %xmm4
861; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
862; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
863; SSSE3-NEXT:    movaps %xmm3, %xmm2
864; SSSE3-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
865; SSSE3-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
866; SSSE3-NEXT:    retq
867;
868; SSE41-LABEL: zext_8i32_to_8i64:
869; SSE41:       # %bb.0: # %entry
870; SSE41-NEXT:    movdqa %xmm1, %xmm3
871; SSE41-NEXT:    movdqa %xmm0, %xmm1
872; SSE41-NEXT:    pxor %xmm4, %xmm4
873; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
874; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
875; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
876; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
877; SSE41-NEXT:    retq
878;
879; AVX1-LABEL: zext_8i32_to_8i64:
880; AVX1:       # %bb.0: # %entry
881; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
882; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
883; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
884; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
885; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
886; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
887; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
888; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
889; AVX1-NEXT:    vmovaps %ymm2, %ymm0
890; AVX1-NEXT:    retq
891;
892; AVX2-LABEL: zext_8i32_to_8i64:
893; AVX2:       # %bb.0: # %entry
894; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
895; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
896; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
897; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
898; AVX2-NEXT:    retq
899;
900; AVX512-LABEL: zext_8i32_to_8i64:
901; AVX512:       # %bb.0: # %entry
902; AVX512-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
903; AVX512-NEXT:    retq
904entry:
905  %B = zext <8 x i32> %A to <8 x i64>
906  ret <8 x i64>%B
907}
908
909define <2 x i64> @load_zext_2i8_to_2i64(ptr%ptr) {
910; SSE2-LABEL: load_zext_2i8_to_2i64:
911; SSE2:       # %bb.0: # %entry
912; SSE2-NEXT:    movzwl (%rdi), %eax
913; SSE2-NEXT:    movd %eax, %xmm0
914; SSE2-NEXT:    pxor %xmm1, %xmm1
915; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
916; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
917; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
918; SSE2-NEXT:    retq
919;
920; SSSE3-LABEL: load_zext_2i8_to_2i64:
921; SSSE3:       # %bb.0: # %entry
922; SSSE3-NEXT:    movzwl (%rdi), %eax
923; SSSE3-NEXT:    movd %eax, %xmm0
924; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
925; SSSE3-NEXT:    retq
926;
927; SSE41-LABEL: load_zext_2i8_to_2i64:
928; SSE41:       # %bb.0: # %entry
929; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
930; SSE41-NEXT:    retq
931;
932; AVX-LABEL: load_zext_2i8_to_2i64:
933; AVX:       # %bb.0: # %entry
934; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
935; AVX-NEXT:    retq
936entry:
937 %X = load <2 x i8>, ptr %ptr
938 %Y = zext <2 x i8> %X to <2 x i64>
939 ret <2 x i64> %Y
940}
941
942define <4 x i32> @load_zext_4i8_to_4i32(ptr%ptr) {
943; SSE2-LABEL: load_zext_4i8_to_4i32:
944; SSE2:       # %bb.0: # %entry
945; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
946; SSE2-NEXT:    pxor %xmm1, %xmm1
947; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
948; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
949; SSE2-NEXT:    retq
950;
951; SSSE3-LABEL: load_zext_4i8_to_4i32:
952; SSSE3:       # %bb.0: # %entry
953; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
954; SSSE3-NEXT:    pxor %xmm1, %xmm1
955; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
956; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
957; SSSE3-NEXT:    retq
958;
959; SSE41-LABEL: load_zext_4i8_to_4i32:
960; SSE41:       # %bb.0: # %entry
961; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
962; SSE41-NEXT:    retq
963;
964; AVX-LABEL: load_zext_4i8_to_4i32:
965; AVX:       # %bb.0: # %entry
966; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
967; AVX-NEXT:    retq
968entry:
969 %X = load <4 x i8>, ptr %ptr
970 %Y = zext <4 x i8> %X to <4 x i32>
971 ret <4 x i32> %Y
972}
973
974define <4 x i64> @load_zext_4i8_to_4i64(ptr%ptr) {
975; SSE2-LABEL: load_zext_4i8_to_4i64:
976; SSE2:       # %bb.0: # %entry
977; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
978; SSE2-NEXT:    pxor %xmm2, %xmm2
979; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
980; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
981; SSE2-NEXT:    movdqa %xmm1, %xmm0
982; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
983; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
984; SSE2-NEXT:    retq
985;
986; SSSE3-LABEL: load_zext_4i8_to_4i64:
987; SSSE3:       # %bb.0: # %entry
988; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
989; SSSE3-NEXT:    movdqa %xmm1, %xmm0
990; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
991; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
992; SSSE3-NEXT:    retq
993;
994; SSE41-LABEL: load_zext_4i8_to_4i64:
995; SSE41:       # %bb.0: # %entry
996; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
997; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
998; SSE41-NEXT:    retq
999;
1000; AVX1-LABEL: load_zext_4i8_to_4i64:
1001; AVX1:       # %bb.0: # %entry
1002; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1003; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1005; AVX1-NEXT:    retq
1006;
1007; AVX2-LABEL: load_zext_4i8_to_4i64:
1008; AVX2:       # %bb.0: # %entry
1009; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1010; AVX2-NEXT:    retq
1011;
1012; AVX512-LABEL: load_zext_4i8_to_4i64:
1013; AVX512:       # %bb.0: # %entry
1014; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1015; AVX512-NEXT:    retq
1016entry:
1017 %X = load <4 x i8>, ptr %ptr
1018 %Y = zext <4 x i8> %X to <4 x i64>
1019 ret <4 x i64> %Y
1020}
1021
1022define <8 x i16> @load_zext_8i8_to_8i16(ptr%ptr) {
1023; SSE2-LABEL: load_zext_8i8_to_8i16:
1024; SSE2:       # %bb.0: # %entry
1025; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1026; SSE2-NEXT:    pxor %xmm1, %xmm1
1027; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1028; SSE2-NEXT:    retq
1029;
1030; SSSE3-LABEL: load_zext_8i8_to_8i16:
1031; SSSE3:       # %bb.0: # %entry
1032; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1033; SSSE3-NEXT:    pxor %xmm1, %xmm1
1034; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1035; SSSE3-NEXT:    retq
1036;
1037; SSE41-LABEL: load_zext_8i8_to_8i16:
1038; SSE41:       # %bb.0: # %entry
1039; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1040; SSE41-NEXT:    retq
1041;
1042; AVX-LABEL: load_zext_8i8_to_8i16:
1043; AVX:       # %bb.0: # %entry
1044; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1045; AVX-NEXT:    retq
1046entry:
1047 %X = load <8 x i8>, ptr %ptr
1048 %Y = zext <8 x i8> %X to <8 x i16>
1049 ret <8 x i16> %Y
1050}
1051
1052define <8 x i32> @load_zext_8i8_to_8i32(ptr%ptr) {
1053; SSE2-LABEL: load_zext_8i8_to_8i32:
1054; SSE2:       # %bb.0: # %entry
1055; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1056; SSE2-NEXT:    pxor %xmm2, %xmm2
1057; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1058; SSE2-NEXT:    movdqa %xmm1, %xmm0
1059; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1060; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1061; SSE2-NEXT:    retq
1062;
1063; SSSE3-LABEL: load_zext_8i8_to_8i32:
1064; SSSE3:       # %bb.0: # %entry
1065; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1066; SSSE3-NEXT:    pxor %xmm2, %xmm2
1067; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1068; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1069; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1070; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1071; SSSE3-NEXT:    retq
1072;
1073; SSE41-LABEL: load_zext_8i8_to_8i32:
1074; SSE41:       # %bb.0: # %entry
1075; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1076; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1077; SSE41-NEXT:    retq
1078;
1079; AVX1-LABEL: load_zext_8i8_to_8i32:
1080; AVX1:       # %bb.0: # %entry
1081; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1082; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1084; AVX1-NEXT:    retq
1085;
1086; AVX2-LABEL: load_zext_8i8_to_8i32:
1087; AVX2:       # %bb.0: # %entry
1088; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1089; AVX2-NEXT:    retq
1090;
1091; AVX512-LABEL: load_zext_8i8_to_8i32:
1092; AVX512:       # %bb.0: # %entry
1093; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1094; AVX512-NEXT:    retq
1095entry:
1096 %X = load <8 x i8>, ptr %ptr
1097 %Y = zext <8 x i8> %X to <8 x i32>
1098 ret <8 x i32> %Y
1099}
1100
1101define <8 x i32> @load_zext_16i8_to_8i32(ptr%ptr) {
1102; SSE2-LABEL: load_zext_16i8_to_8i32:
1103; SSE2:       # %bb.0: # %entry
1104; SSE2-NEXT:    movdqa (%rdi), %xmm1
1105; SSE2-NEXT:    pxor %xmm2, %xmm2
1106; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1107; SSE2-NEXT:    movdqa %xmm1, %xmm0
1108; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1109; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1110; SSE2-NEXT:    retq
1111;
1112; SSSE3-LABEL: load_zext_16i8_to_8i32:
1113; SSSE3:       # %bb.0: # %entry
1114; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1115; SSSE3-NEXT:    pxor %xmm2, %xmm2
1116; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1117; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1118; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1119; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1120; SSSE3-NEXT:    retq
1121;
1122; SSE41-LABEL: load_zext_16i8_to_8i32:
1123; SSE41:       # %bb.0: # %entry
1124; SSE41-NEXT:    movdqa (%rdi), %xmm1
1125; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1126; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
1127; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1128; SSE41-NEXT:    retq
1129;
1130; AVX1-LABEL: load_zext_16i8_to_8i32:
1131; AVX1:       # %bb.0: # %entry
1132; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1133; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1134; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1135; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1136; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1137; AVX1-NEXT:    retq
1138;
1139; AVX2-LABEL: load_zext_16i8_to_8i32:
1140; AVX2:       # %bb.0: # %entry
1141; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1142; AVX2-NEXT:    retq
1143;
1144; AVX512-LABEL: load_zext_16i8_to_8i32:
1145; AVX512:       # %bb.0: # %entry
1146; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1147; AVX512-NEXT:    retq
1148entry:
1149 %X = load <16 x i8>, ptr %ptr
1150 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1151 %Z = zext <8 x i8> %Y to <8 x i32>
1152 ret <8 x i32> %Z
1153}
1154
1155define <8 x i64> @load_zext_8i8_to_8i64(ptr%ptr) {
1156; SSE2-LABEL: load_zext_8i8_to_8i64:
1157; SSE2:       # %bb.0: # %entry
1158; SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
1159; SSE2-NEXT:    pxor %xmm4, %xmm4
1160; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1161; SSE2-NEXT:    movdqa %xmm3, %xmm1
1162; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1163; SSE2-NEXT:    movdqa %xmm1, %xmm0
1164; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1165; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1166; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1167; SSE2-NEXT:    movdqa %xmm3, %xmm2
1168; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1169; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1170; SSE2-NEXT:    retq
1171;
1172; SSSE3-LABEL: load_zext_8i8_to_8i64:
1173; SSSE3:       # %bb.0: # %entry
1174; SSSE3-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
1175; SSSE3-NEXT:    movdqa %xmm3, %xmm0
1176; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1177; SSSE3-NEXT:    movdqa %xmm3, %xmm1
1178; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
1179; SSSE3-NEXT:    movdqa %xmm3, %xmm2
1180; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero
1181; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero
1182; SSSE3-NEXT:    retq
1183;
1184; SSE41-LABEL: load_zext_8i8_to_8i64:
1185; SSE41:       # %bb.0: # %entry
1186; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1187; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1188; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1189; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1190; SSE41-NEXT:    retq
1191;
1192; AVX1-LABEL: load_zext_8i8_to_8i64:
1193; AVX1:       # %bb.0: # %entry
1194; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1195; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1199; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1200; AVX1-NEXT:    retq
1201;
1202; AVX2-LABEL: load_zext_8i8_to_8i64:
1203; AVX2:       # %bb.0: # %entry
1204; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1205; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1206; AVX2-NEXT:    retq
1207;
1208; AVX512-LABEL: load_zext_8i8_to_8i64:
1209; AVX512:       # %bb.0: # %entry
1210; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1211; AVX512-NEXT:    retq
1212entry:
1213 %X = load <8 x i8>, ptr %ptr
1214 %Y = zext <8 x i8> %X to <8 x i64>
1215 ret <8 x i64> %Y
1216}
1217
1218define <16 x i16> @load_zext_16i8_to_16i16(ptr%ptr) {
1219; SSE2-LABEL: load_zext_16i8_to_16i16:
1220; SSE2:       # %bb.0: # %entry
1221; SSE2-NEXT:    movdqa (%rdi), %xmm1
1222; SSE2-NEXT:    pxor %xmm2, %xmm2
1223; SSE2-NEXT:    movdqa %xmm1, %xmm0
1224; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1225; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1226; SSE2-NEXT:    retq
1227;
1228; SSSE3-LABEL: load_zext_16i8_to_16i16:
1229; SSSE3:       # %bb.0: # %entry
1230; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1231; SSSE3-NEXT:    pxor %xmm2, %xmm2
1232; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1233; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1234; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1235; SSSE3-NEXT:    retq
1236;
1237; SSE41-LABEL: load_zext_16i8_to_16i16:
1238; SSE41:       # %bb.0: # %entry
1239; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1240; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1241; SSE41-NEXT:    retq
1242;
1243; AVX1-LABEL: load_zext_16i8_to_16i16:
1244; AVX1:       # %bb.0: # %entry
1245; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1246; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1247; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1248; AVX1-NEXT:    retq
1249;
1250; AVX2-LABEL: load_zext_16i8_to_16i16:
1251; AVX2:       # %bb.0: # %entry
1252; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1253; AVX2-NEXT:    retq
1254;
1255; AVX512-LABEL: load_zext_16i8_to_16i16:
1256; AVX512:       # %bb.0: # %entry
1257; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1258; AVX512-NEXT:    retq
1259entry:
1260 %X = load <16 x i8>, ptr %ptr
1261 %Y = zext <16 x i8> %X to <16 x i16>
1262 ret <16 x i16> %Y
1263}
1264
1265define <2 x i64> @load_zext_2i16_to_2i64(ptr%ptr) {
1266; SSE2-LABEL: load_zext_2i16_to_2i64:
1267; SSE2:       # %bb.0: # %entry
1268; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1269; SSE2-NEXT:    pxor %xmm1, %xmm1
1270; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1271; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1272; SSE2-NEXT:    retq
1273;
1274; SSSE3-LABEL: load_zext_2i16_to_2i64:
1275; SSSE3:       # %bb.0: # %entry
1276; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1277; SSSE3-NEXT:    pxor %xmm1, %xmm1
1278; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1279; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1280; SSSE3-NEXT:    retq
1281;
1282; SSE41-LABEL: load_zext_2i16_to_2i64:
1283; SSE41:       # %bb.0: # %entry
1284; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1285; SSE41-NEXT:    retq
1286;
1287; AVX-LABEL: load_zext_2i16_to_2i64:
1288; AVX:       # %bb.0: # %entry
1289; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1290; AVX-NEXT:    retq
1291entry:
1292 %X = load <2 x i16>, ptr %ptr
1293 %Y = zext <2 x i16> %X to <2 x i64>
1294 ret <2 x i64> %Y
1295}
1296
1297define <4 x i32> @load_zext_4i16_to_4i32(ptr%ptr) {
1298; SSE2-LABEL: load_zext_4i16_to_4i32:
1299; SSE2:       # %bb.0: # %entry
1300; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1301; SSE2-NEXT:    pxor %xmm1, %xmm1
1302; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1303; SSE2-NEXT:    retq
1304;
1305; SSSE3-LABEL: load_zext_4i16_to_4i32:
1306; SSSE3:       # %bb.0: # %entry
1307; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1308; SSSE3-NEXT:    pxor %xmm1, %xmm1
1309; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1310; SSSE3-NEXT:    retq
1311;
1312; SSE41-LABEL: load_zext_4i16_to_4i32:
1313; SSE41:       # %bb.0: # %entry
1314; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1315; SSE41-NEXT:    retq
1316;
1317; AVX-LABEL: load_zext_4i16_to_4i32:
1318; AVX:       # %bb.0: # %entry
1319; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1320; AVX-NEXT:    retq
1321entry:
1322 %X = load <4 x i16>, ptr %ptr
1323 %Y = zext <4 x i16> %X to <4 x i32>
1324 ret <4 x i32> %Y
1325}
1326
1327define <4 x i64> @load_zext_4i16_to_4i64(ptr%ptr) {
1328; SSE2-LABEL: load_zext_4i16_to_4i64:
1329; SSE2:       # %bb.0: # %entry
1330; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1331; SSE2-NEXT:    pxor %xmm2, %xmm2
1332; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1333; SSE2-NEXT:    movdqa %xmm1, %xmm0
1334; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1335; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1336; SSE2-NEXT:    retq
1337;
1338; SSSE3-LABEL: load_zext_4i16_to_4i64:
1339; SSSE3:       # %bb.0: # %entry
1340; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1341; SSSE3-NEXT:    pxor %xmm2, %xmm2
1342; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1343; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1344; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1345; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1346; SSSE3-NEXT:    retq
1347;
1348; SSE41-LABEL: load_zext_4i16_to_4i64:
1349; SSE41:       # %bb.0: # %entry
1350; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1351; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1352; SSE41-NEXT:    retq
1353;
1354; AVX1-LABEL: load_zext_4i16_to_4i64:
1355; AVX1:       # %bb.0: # %entry
1356; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1357; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1358; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1359; AVX1-NEXT:    retq
1360;
1361; AVX2-LABEL: load_zext_4i16_to_4i64:
1362; AVX2:       # %bb.0: # %entry
1363; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1364; AVX2-NEXT:    retq
1365;
1366; AVX512-LABEL: load_zext_4i16_to_4i64:
1367; AVX512:       # %bb.0: # %entry
1368; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1369; AVX512-NEXT:    retq
1370entry:
1371 %X = load <4 x i16>, ptr %ptr
1372 %Y = zext <4 x i16> %X to <4 x i64>
1373 ret <4 x i64> %Y
1374}
1375
1376define <8 x i32> @load_zext_8i16_to_8i32(ptr%ptr) {
1377; SSE2-LABEL: load_zext_8i16_to_8i32:
1378; SSE2:       # %bb.0: # %entry
1379; SSE2-NEXT:    movdqa (%rdi), %xmm1
1380; SSE2-NEXT:    pxor %xmm2, %xmm2
1381; SSE2-NEXT:    movdqa %xmm1, %xmm0
1382; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1383; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1384; SSE2-NEXT:    retq
1385;
1386; SSSE3-LABEL: load_zext_8i16_to_8i32:
1387; SSSE3:       # %bb.0: # %entry
1388; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1389; SSSE3-NEXT:    pxor %xmm2, %xmm2
1390; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1391; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1392; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1393; SSSE3-NEXT:    retq
1394;
1395; SSE41-LABEL: load_zext_8i16_to_8i32:
1396; SSE41:       # %bb.0: # %entry
1397; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1398; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1399; SSE41-NEXT:    retq
1400;
1401; AVX1-LABEL: load_zext_8i16_to_8i32:
1402; AVX1:       # %bb.0: # %entry
1403; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1404; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1405; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1406; AVX1-NEXT:    retq
1407;
1408; AVX2-LABEL: load_zext_8i16_to_8i32:
1409; AVX2:       # %bb.0: # %entry
1410; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1411; AVX2-NEXT:    retq
1412;
1413; AVX512-LABEL: load_zext_8i16_to_8i32:
1414; AVX512:       # %bb.0: # %entry
1415; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1416; AVX512-NEXT:    retq
1417entry:
1418 %X = load <8 x i16>, ptr %ptr
1419 %Y = zext <8 x i16> %X to <8 x i32>
1420 ret <8 x i32> %Y
1421}
1422
1423define <2 x i64> @load_zext_2i32_to_2i64(ptr%ptr) {
1424; SSE2-LABEL: load_zext_2i32_to_2i64:
1425; SSE2:       # %bb.0: # %entry
1426; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1427; SSE2-NEXT:    xorps %xmm1, %xmm1
1428; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1429; SSE2-NEXT:    retq
1430;
1431; SSSE3-LABEL: load_zext_2i32_to_2i64:
1432; SSSE3:       # %bb.0: # %entry
1433; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1434; SSSE3-NEXT:    xorps %xmm1, %xmm1
1435; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1436; SSSE3-NEXT:    retq
1437;
1438; SSE41-LABEL: load_zext_2i32_to_2i64:
1439; SSE41:       # %bb.0: # %entry
1440; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1441; SSE41-NEXT:    retq
1442;
1443; AVX-LABEL: load_zext_2i32_to_2i64:
1444; AVX:       # %bb.0: # %entry
1445; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1446; AVX-NEXT:    retq
1447entry:
1448 %X = load <2 x i32>, ptr %ptr
1449 %Y = zext <2 x i32> %X to <2 x i64>
1450 ret <2 x i64> %Y
1451}
1452
1453define <4 x i64> @load_zext_4i32_to_4i64(ptr%ptr) {
1454; SSE2-LABEL: load_zext_4i32_to_4i64:
1455; SSE2:       # %bb.0: # %entry
1456; SSE2-NEXT:    movaps (%rdi), %xmm1
1457; SSE2-NEXT:    xorps %xmm2, %xmm2
1458; SSE2-NEXT:    movaps %xmm1, %xmm0
1459; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1460; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1461; SSE2-NEXT:    retq
1462;
1463; SSSE3-LABEL: load_zext_4i32_to_4i64:
1464; SSSE3:       # %bb.0: # %entry
1465; SSSE3-NEXT:    movaps (%rdi), %xmm1
1466; SSSE3-NEXT:    xorps %xmm2, %xmm2
1467; SSSE3-NEXT:    movaps %xmm1, %xmm0
1468; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1469; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1470; SSSE3-NEXT:    retq
1471;
1472; SSE41-LABEL: load_zext_4i32_to_4i64:
1473; SSE41:       # %bb.0: # %entry
1474; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1475; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1476; SSE41-NEXT:    retq
1477;
1478; AVX1-LABEL: load_zext_4i32_to_4i64:
1479; AVX1:       # %bb.0: # %entry
1480; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1481; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1482; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1483; AVX1-NEXT:    retq
1484;
1485; AVX2-LABEL: load_zext_4i32_to_4i64:
1486; AVX2:       # %bb.0: # %entry
1487; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1488; AVX2-NEXT:    retq
1489;
1490; AVX512-LABEL: load_zext_4i32_to_4i64:
1491; AVX512:       # %bb.0: # %entry
1492; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1493; AVX512-NEXT:    retq
1494entry:
1495 %X = load <4 x i32>, ptr %ptr
1496 %Y = zext <4 x i32> %X to <4 x i64>
1497 ret <4 x i64> %Y
1498}
1499
1500define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1501; SSE2-LABEL: zext_8i8_to_8i32:
1502; SSE2:       # %bb.0: # %entry
1503; SSE2-NEXT:    movdqa %xmm0, %xmm1
1504; SSE2-NEXT:    pxor %xmm2, %xmm2
1505; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1506; SSE2-NEXT:    movdqa %xmm1, %xmm0
1507; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1508; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1509; SSE2-NEXT:    retq
1510;
1511; SSSE3-LABEL: zext_8i8_to_8i32:
1512; SSSE3:       # %bb.0: # %entry
1513; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1514; SSSE3-NEXT:    pxor %xmm2, %xmm2
1515; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1516; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1517; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1518; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1519; SSSE3-NEXT:    retq
1520;
1521; SSE41-LABEL: zext_8i8_to_8i32:
1522; SSE41:       # %bb.0: # %entry
1523; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1524; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1525; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1526; SSE41-NEXT:    movdqa %xmm2, %xmm0
1527; SSE41-NEXT:    retq
1528;
1529; AVX1-LABEL: zext_8i8_to_8i32:
1530; AVX1:       # %bb.0: # %entry
1531; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1532; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1533; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1534; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1535; AVX1-NEXT:    retq
1536;
1537; AVX2-LABEL: zext_8i8_to_8i32:
1538; AVX2:       # %bb.0: # %entry
1539; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1540; AVX2-NEXT:    retq
1541;
1542; AVX512-LABEL: zext_8i8_to_8i32:
1543; AVX512:       # %bb.0: # %entry
1544; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1545; AVX512-NEXT:    retq
1546entry:
1547  %t = zext <8 x i8> %z to <8 x i32>
1548  ret <8 x i32> %t
1549}
1550
1551define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1552; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1553; SSE2:       # %bb.0: # %entry
1554; SSE2-NEXT:    movdqa %xmm0, %xmm1
1555; SSE2-NEXT:    pxor %xmm2, %xmm2
1556; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1557; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1558; SSE2-NEXT:    retq
1559;
1560; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1561; SSSE3:       # %bb.0: # %entry
1562; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1563; SSSE3-NEXT:    pxor %xmm2, %xmm2
1564; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1565; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1566; SSSE3-NEXT:    retq
1567;
1568; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1569; SSE41:       # %bb.0: # %entry
1570; SSE41-NEXT:    movdqa %xmm0, %xmm1
1571; SSE41-NEXT:    pxor %xmm2, %xmm2
1572; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1573; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1574; SSE41-NEXT:    retq
1575;
1576; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1577; AVX1:       # %bb.0: # %entry
1578; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1579; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1580; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1581; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1582; AVX1-NEXT:    retq
1583;
1584; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1585; AVX2:       # %bb.0: # %entry
1586; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1587; AVX2-NEXT:    retq
1588;
1589; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1590; AVX512:       # %bb.0: # %entry
1591; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1592; AVX512-NEXT:    retq
1593entry:
1594  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1595  %Z = bitcast <16 x i16> %B to <8 x i32>
1596  ret <8 x i32> %Z
1597}
1598
1599define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1600; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1601; SSE2:       # %bb.0: # %entry
1602; SSE2-NEXT:    movaps %xmm0, %xmm1
1603; SSE2-NEXT:    xorps %xmm2, %xmm2
1604; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1605; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1606; SSE2-NEXT:    retq
1607;
1608; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1609; SSSE3:       # %bb.0: # %entry
1610; SSSE3-NEXT:    movaps %xmm0, %xmm1
1611; SSSE3-NEXT:    xorps %xmm2, %xmm2
1612; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1613; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1614; SSSE3-NEXT:    retq
1615;
1616; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1617; SSE41:       # %bb.0: # %entry
1618; SSE41-NEXT:    movdqa %xmm0, %xmm1
1619; SSE41-NEXT:    pxor %xmm2, %xmm2
1620; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1621; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1622; SSE41-NEXT:    retq
1623;
1624; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1625; AVX1:       # %bb.0: # %entry
1626; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1627; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1628; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1629; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1630; AVX1-NEXT:    retq
1631;
1632; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1633; AVX2:       # %bb.0: # %entry
1634; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1635; AVX2-NEXT:    retq
1636;
1637; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1638; AVX512:       # %bb.0: # %entry
1639; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1640; AVX512-NEXT:    retq
1641entry:
1642  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1643  %Z = bitcast <8 x i32> %B to <4 x i64>
1644  ret <4 x i64> %Z
1645}
1646
1647define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1648; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1649; SSE2:       # %bb.0: # %entry
1650; SSE2-NEXT:    movdqa %xmm0, %xmm1
1651; SSE2-NEXT:    pxor %xmm2, %xmm2
1652; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1653; SSE2-NEXT:    movdqa %xmm1, %xmm0
1654; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1655; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1656; SSE2-NEXT:    retq
1657;
1658; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1659; SSSE3:       # %bb.0: # %entry
1660; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1661; SSSE3-NEXT:    pxor %xmm2, %xmm2
1662; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1663; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1664; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1665; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1666; SSSE3-NEXT:    retq
1667;
1668; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1669; SSE41:       # %bb.0: # %entry
1670; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1671; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1672; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1673; SSE41-NEXT:    movdqa %xmm2, %xmm0
1674; SSE41-NEXT:    retq
1675;
1676; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1677; AVX1:       # %bb.0: # %entry
1678; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1679; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1680; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1681; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1682; AVX1-NEXT:    retq
1683;
1684; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1685; AVX2:       # %bb.0: # %entry
1686; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1687; AVX2-NEXT:    retq
1688;
1689; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1690; AVX512:       # %bb.0: # %entry
1691; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1692; AVX512-NEXT:    retq
1693entry:
1694  %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1695  %Z = bitcast <32 x i8> %B to <8 x i32>
1696  ret <8 x i32> %Z
1697}
1698
1699define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1700; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1701; SSE2:       # %bb.0: # %entry
1702; SSE2-NEXT:    pxor %xmm1, %xmm1
1703; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1704; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1705; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1706; SSE2-NEXT:    retq
1707;
1708; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1709; SSSE3:       # %bb.0: # %entry
1710; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1711; SSSE3-NEXT:    retq
1712;
1713; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1714; SSE41:       # %bb.0: # %entry
1715; SSE41-NEXT:    psrlq $48, %xmm0
1716; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1717; SSE41-NEXT:    retq
1718;
1719; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
1720; AVX1:       # %bb.0: # %entry
1721; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
1722; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1723; AVX1-NEXT:    retq
1724;
1725; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1726; AVX2-SLOW:       # %bb.0: # %entry
1727; AVX2-SLOW-NEXT:    vpsrlq $48, %xmm0, %xmm0
1728; AVX2-SLOW-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1729; AVX2-SLOW-NEXT:    retq
1730;
1731; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
1732; AVX2-FAST:       # %bb.0: # %entry
1733; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1734; AVX2-FAST-NEXT:    retq
1735;
1736; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
1737; AVX512F:       # %bb.0: # %entry
1738; AVX512F-NEXT:    vpsrlq $48, %xmm0, %xmm0
1739; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1740; AVX512F-NEXT:    retq
1741;
1742; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
1743; AVX512BW:       # %bb.0: # %entry
1744; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1745; AVX512BW-NEXT:    retq
1746entry:
1747  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1748  %Z = bitcast <16 x i8> %B to <2 x i64>
1749  ret <2 x i64> %Z
1750}
1751
1752define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1753; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1754; SSE2:       # %bb.0: # %entry
1755; SSE2-NEXT:    movdqa %xmm0, %xmm1
1756; SSE2-NEXT:    psrlq $8, %xmm1
1757; SSE2-NEXT:    pxor %xmm2, %xmm2
1758; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1759; SSE2-NEXT:    movdqa %xmm1, %xmm0
1760; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1761; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1762; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1763; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1764; SSE2-NEXT:    retq
1765;
1766; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1767; SSSE3:       # %bb.0: # %entry
1768; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1769; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1770; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1771; SSSE3-NEXT:    retq
1772;
1773; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1774; SSE41:       # %bb.0: # %entry
1775; SSE41-NEXT:    movdqa %xmm0, %xmm1
1776; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1777; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1778; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1779; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1780; SSE41-NEXT:    movdqa %xmm2, %xmm0
1781; SSE41-NEXT:    retq
1782;
1783; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1784; AVX1:       # %bb.0: # %entry
1785; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1786; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1787; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1788; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1789; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1790; AVX1-NEXT:    retq
1791;
1792; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1793; AVX2:       # %bb.0: # %entry
1794; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1795; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1796; AVX2-NEXT:    retq
1797;
1798; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1799; AVX512:       # %bb.0: # %entry
1800; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1801; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1802; AVX512-NEXT:    retq
1803entry:
1804  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1805  %Z = bitcast <32 x i8> %B to <4 x i64>
1806  ret <4 x i64> %Z
1807}
1808
1809define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1810; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1811; SSE2:       # %bb.0: # %entry
1812; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1813; SSE2-NEXT:    pxor %xmm1, %xmm1
1814; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1815; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1816; SSE2-NEXT:    retq
1817;
1818; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1819; SSSE3:       # %bb.0: # %entry
1820; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1821; SSSE3-NEXT:    retq
1822;
1823; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1824; SSE41:       # %bb.0: # %entry
1825; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1826; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1827; SSE41-NEXT:    retq
1828;
1829; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
1830; AVX1:       # %bb.0: # %entry
1831; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1832; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1833; AVX1-NEXT:    retq
1834;
1835; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1836; AVX2-SLOW:       # %bb.0: # %entry
1837; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1838; AVX2-SLOW-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1839; AVX2-SLOW-NEXT:    retq
1840;
1841; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
1842; AVX2-FAST:       # %bb.0: # %entry
1843; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1844; AVX2-FAST-NEXT:    retq
1845;
1846; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
1847; AVX512F:       # %bb.0: # %entry
1848; AVX512F-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1849; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1850; AVX512F-NEXT:    retq
1851;
1852; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
1853; AVX512BW:       # %bb.0: # %entry
1854; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1855; AVX512BW-NEXT:    retq
1856entry:
1857  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1858  %Z = bitcast <8 x i16> %B to <2 x i64>
1859  ret <2 x i64> %Z
1860}
1861
1862define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1863; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1864; SSE2:       # %bb.0: # %entry
1865; SSE2-NEXT:    movdqa %xmm0, %xmm1
1866; SSE2-NEXT:    pxor %xmm2, %xmm2
1867; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1868; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1869; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1870; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1871; SSE2-NEXT:    retq
1872;
1873; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1874; SSSE3:       # %bb.0: # %entry
1875; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1876; SSSE3-NEXT:    pxor %xmm2, %xmm2
1877; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1878; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1879; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1880; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1881; SSSE3-NEXT:    retq
1882;
1883; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1884; SSE41:       # %bb.0: # %entry
1885; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1886; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1887; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1888; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1889; SSE41-NEXT:    movdqa %xmm2, %xmm0
1890; SSE41-NEXT:    retq
1891;
1892; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1893; AVX1:       # %bb.0: # %entry
1894; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1895; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1896; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1897; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1898; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1899; AVX1-NEXT:    retq
1900;
1901; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1902; AVX2:       # %bb.0: # %entry
1903; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1904; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1905; AVX2-NEXT:    retq
1906;
1907; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1908; AVX512:       # %bb.0: # %entry
1909; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1910; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1911; AVX512-NEXT:    retq
1912entry:
1913  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1914  %Z = bitcast <16 x i16> %B to <4 x i64>
1915  ret <4 x i64> %Z
1916}
1917
1918define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1919; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1920; SSE2:       # %bb.0: # %entry
1921; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1922; SSE2-NEXT:    pxor %xmm1, %xmm1
1923; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1924; SSE2-NEXT:    retq
1925;
1926; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1927; SSSE3:       # %bb.0: # %entry
1928; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1929; SSSE3-NEXT:    pxor %xmm1, %xmm1
1930; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1931; SSSE3-NEXT:    retq
1932;
1933; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1934; SSE41:       # %bb.0: # %entry
1935; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1936; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1937; SSE41-NEXT:    retq
1938;
1939; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
1940; AVX1:       # %bb.0: # %entry
1941; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1942; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1943; AVX1-NEXT:    retq
1944;
1945; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1946; AVX2-SLOW:       # %bb.0: # %entry
1947; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1948; AVX2-SLOW-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1949; AVX2-SLOW-NEXT:    retq
1950;
1951; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
1952; AVX2-FAST:       # %bb.0: # %entry
1953; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1954; AVX2-FAST-NEXT:    retq
1955;
1956; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
1957; AVX512F:       # %bb.0: # %entry
1958; AVX512F-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1959; AVX512F-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1960; AVX512F-NEXT:    retq
1961;
1962; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
1963; AVX512BW:       # %bb.0: # %entry
1964; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
1965; AVX512BW-NEXT:    retq
1966entry:
1967  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1968  %Z = bitcast <8 x i16> %B to <4 x i32>
1969  ret <4 x i32> %Z
1970}
1971
1972define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1973; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1974; SSE2:       # %bb.0: # %entry
1975; SSE2-NEXT:    movdqa %xmm0, %xmm1
1976; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1977; SSE2-NEXT:    pxor %xmm2, %xmm2
1978; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1979; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1980; SSE2-NEXT:    retq
1981;
1982; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1983; SSSE3:       # %bb.0: # %entry
1984; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1985; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1986; SSSE3-NEXT:    pxor %xmm2, %xmm2
1987; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1988; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1989; SSSE3-NEXT:    retq
1990;
1991; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1992; SSE41:       # %bb.0: # %entry
1993; SSE41-NEXT:    movdqa %xmm0, %xmm1
1994; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1995; SSE41-NEXT:    pxor %xmm2, %xmm2
1996; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1997; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1998; SSE41-NEXT:    retq
1999;
2000; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
2001; AVX1:       # %bb.0: # %entry
2002; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2003; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2004; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2005; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2006; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2007; AVX1-NEXT:    retq
2008;
2009; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
2010; AVX2:       # %bb.0: # %entry
2011; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2012; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2013; AVX2-NEXT:    retq
2014;
2015; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
2016; AVX512:       # %bb.0: # %entry
2017; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
2018; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2019; AVX512-NEXT:    retq
2020entry:
2021  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
2022  %Z = bitcast <16 x i16> %B to <8 x i32>
2023  ret <8 x i32> %Z
2024}
2025
2026define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
2027; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2028; SSE2:       # %bb.0: # %entry
2029; SSE2-NEXT:    pxor %xmm2, %xmm2
2030; SSE2-NEXT:    movdqa %xmm1, %xmm0
2031; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2032; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2033; SSE2-NEXT:    retq
2034;
2035; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
2036; SSSE3:       # %bb.0: # %entry
2037; SSSE3-NEXT:    pxor %xmm2, %xmm2
2038; SSSE3-NEXT:    movdqa %xmm1, %xmm0
2039; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2040; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2041; SSSE3-NEXT:    retq
2042;
2043; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
2044; SSE41:       # %bb.0: # %entry
2045; SSE41-NEXT:    pxor %xmm2, %xmm2
2046; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2047; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2048; SSE41-NEXT:    retq
2049;
2050; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2051; AVX1:       # %bb.0: # %entry
2052; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2053; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2054; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2055; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2056; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2057; AVX1-NEXT:    retq
2058;
2059; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2060; AVX2:       # %bb.0: # %entry
2061; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
2062; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2063; AVX2-NEXT:    retq
2064;
2065; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2066; AVX512:       # %bb.0: # %entry
2067; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
2068; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2069; AVX512-NEXT:    retq
2070entry:
2071  %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2072  %Z = bitcast <16 x i16> %B to <8 x i32>
2073  ret <8 x i32> %Z
2074}
2075
2076define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2077; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2078; SSE:       # %bb.0: # %entry
2079; SSE-NEXT:    xorps %xmm1, %xmm1
2080; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2081; SSE-NEXT:    retq
2082;
2083; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2084; AVX:       # %bb.0: # %entry
2085; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2086; AVX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2087; AVX-NEXT:    retq
2088entry:
2089  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2090  %Z = bitcast <4 x i32> %B to <2 x i64>
2091  ret <2 x i64> %Z
2092}
2093
2094define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2095; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2096; SSE2:       # %bb.0: # %entry
2097; SSE2-NEXT:    movdqa %xmm0, %xmm1
2098; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2099; SSE2-NEXT:    pand %xmm1, %xmm0
2100; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2101; SSE2-NEXT:    retq
2102;
2103; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2104; SSSE3:       # %bb.0: # %entry
2105; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2106; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2107; SSSE3-NEXT:    pand %xmm1, %xmm0
2108; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2109; SSSE3-NEXT:    retq
2110;
2111; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2112; SSE41:       # %bb.0: # %entry
2113; SSE41-NEXT:    movdqa %xmm0, %xmm1
2114; SSE41-NEXT:    pxor %xmm0, %xmm0
2115; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2116; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2117; SSE41-NEXT:    retq
2118;
2119; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2120; AVX1:       # %bb.0: # %entry
2121; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2122; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
2123; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2124; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2125; AVX1-NEXT:    retq
2126;
2127; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2128; AVX2:       # %bb.0: # %entry
2129; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2130; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2131; AVX2-NEXT:    retq
2132;
2133; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2134; AVX512:       # %bb.0: # %entry
2135; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2136; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2137; AVX512-NEXT:    retq
2138entry:
2139  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2140  %Z = bitcast <8 x i32> %B to <4 x i64>
2141  ret <4 x i64> %Z
2142}
2143
2144define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
2145; SSE2-LABEL: zext_32i8_to_32i32:
2146; SSE2:       # %bb.0:
2147; SSE2-NEXT:    movq %rdi, %rax
2148; SSE2-NEXT:    pxor %xmm2, %xmm2
2149; SSE2-NEXT:    movdqa %xmm0, %xmm3
2150; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2151; SSE2-NEXT:    movdqa %xmm3, %xmm4
2152; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2153; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2154; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2155; SSE2-NEXT:    movdqa %xmm0, %xmm5
2156; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2157; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2158; SSE2-NEXT:    movdqa %xmm1, %xmm6
2159; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2160; SSE2-NEXT:    movdqa %xmm6, %xmm7
2161; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2162; SSE2-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2163; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2164; SSE2-NEXT:    movdqa %xmm1, %xmm8
2165; SSE2-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2166; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2167; SSE2-NEXT:    movdqa %xmm1, 112(%rdi)
2168; SSE2-NEXT:    movdqa %xmm8, 96(%rdi)
2169; SSE2-NEXT:    movdqa %xmm6, 80(%rdi)
2170; SSE2-NEXT:    movdqa %xmm7, 64(%rdi)
2171; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
2172; SSE2-NEXT:    movdqa %xmm5, 32(%rdi)
2173; SSE2-NEXT:    movdqa %xmm3, 16(%rdi)
2174; SSE2-NEXT:    movdqa %xmm4, (%rdi)
2175; SSE2-NEXT:    retq
2176;
2177; SSSE3-LABEL: zext_32i8_to_32i32:
2178; SSSE3:       # %bb.0:
2179; SSSE3-NEXT:    movq %rdi, %rax
2180; SSSE3-NEXT:    pxor %xmm2, %xmm2
2181; SSSE3-NEXT:    movdqa %xmm0, %xmm3
2182; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2183; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2184; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2185; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2186; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
2187; SSSE3-NEXT:    movdqa %xmm0, %xmm5
2188; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
2189; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2190; SSSE3-NEXT:    movdqa %xmm1, %xmm6
2191; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2192; SSSE3-NEXT:    movdqa %xmm6, %xmm7
2193; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
2194; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
2195; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
2196; SSSE3-NEXT:    movdqa %xmm1, %xmm8
2197; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
2198; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2199; SSSE3-NEXT:    movdqa %xmm1, 112(%rdi)
2200; SSSE3-NEXT:    movdqa %xmm8, 96(%rdi)
2201; SSSE3-NEXT:    movdqa %xmm6, 80(%rdi)
2202; SSSE3-NEXT:    movdqa %xmm7, 64(%rdi)
2203; SSSE3-NEXT:    movdqa %xmm0, 48(%rdi)
2204; SSSE3-NEXT:    movdqa %xmm5, 32(%rdi)
2205; SSSE3-NEXT:    movdqa %xmm3, 16(%rdi)
2206; SSSE3-NEXT:    movdqa %xmm4, (%rdi)
2207; SSSE3-NEXT:    retq
2208;
2209; SSE41-LABEL: zext_32i8_to_32i32:
2210; SSE41:       # %bb.0:
2211; SSE41-NEXT:    movq %rdi, %rax
2212; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2213; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1]
2214; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2215; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
2216; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
2217; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2218; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2219; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2220; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,1,1]
2221; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2222; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
2223; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2224; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2225; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2226; SSE41-NEXT:    movdqa %xmm1, 112(%rdi)
2227; SSE41-NEXT:    movdqa %xmm7, 96(%rdi)
2228; SSE41-NEXT:    movdqa %xmm6, 80(%rdi)
2229; SSE41-NEXT:    movdqa %xmm5, 64(%rdi)
2230; SSE41-NEXT:    movdqa %xmm0, 48(%rdi)
2231; SSE41-NEXT:    movdqa %xmm4, 32(%rdi)
2232; SSE41-NEXT:    movdqa %xmm3, 16(%rdi)
2233; SSE41-NEXT:    movdqa %xmm2, (%rdi)
2234; SSE41-NEXT:    retq
2235;
2236; AVX1-LABEL: zext_32i8_to_32i32:
2237; AVX1:       # %bb.0:
2238; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2239; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
2240; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2241; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
2242; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2243; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2244; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
2245; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2246; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
2247; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2248; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2249; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
2250; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2251; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
2252; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2253; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2254; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2255; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2256; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
2257; AVX1-NEXT:    vmovaps %ymm4, %ymm0
2258; AVX1-NEXT:    retq
2259;
2260; AVX2-LABEL: zext_32i8_to_32i32:
2261; AVX2:       # %bb.0:
2262; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2263; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
2264; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
2265; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2266; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2267; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2268; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2269; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
2270; AVX2-NEXT:    retq
2271;
2272; AVX512-LABEL: zext_32i8_to_32i32:
2273; AVX512:       # %bb.0:
2274; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2275; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
2276; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2277; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
2278; AVX512-NEXT:    retq
2279  %res = zext <32 x i8>%x to <32 x i32>
2280  ret <32 x i32> %res
2281}
2282
2283define <2 x i32> @zext_2i8_to_2i32(ptr %addr) {
2284; SSE2-LABEL: zext_2i8_to_2i32:
2285; SSE2:       # %bb.0:
2286; SSE2-NEXT:    movzwl (%rdi), %eax
2287; SSE2-NEXT:    movd %eax, %xmm0
2288; SSE2-NEXT:    pxor %xmm1, %xmm1
2289; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2290; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2291; SSE2-NEXT:    paddd %xmm0, %xmm0
2292; SSE2-NEXT:    retq
2293;
2294; SSSE3-LABEL: zext_2i8_to_2i32:
2295; SSSE3:       # %bb.0:
2296; SSSE3-NEXT:    movzwl (%rdi), %eax
2297; SSSE3-NEXT:    movd %eax, %xmm0
2298; SSSE3-NEXT:    pxor %xmm1, %xmm1
2299; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2300; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2301; SSSE3-NEXT:    paddd %xmm0, %xmm0
2302; SSSE3-NEXT:    retq
2303;
2304; SSE41-LABEL: zext_2i8_to_2i32:
2305; SSE41:       # %bb.0:
2306; SSE41-NEXT:    movzwl (%rdi), %eax
2307; SSE41-NEXT:    movd %eax, %xmm0
2308; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2309; SSE41-NEXT:    paddd %xmm0, %xmm0
2310; SSE41-NEXT:    retq
2311;
2312; AVX-LABEL: zext_2i8_to_2i32:
2313; AVX:       # %bb.0:
2314; AVX-NEXT:    movzwl (%rdi), %eax
2315; AVX-NEXT:    vmovd %eax, %xmm0
2316; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2317; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2318; AVX-NEXT:    retq
2319  %x = load <2 x i8>, ptr %addr, align 1
2320  %y = zext <2 x i8> %x to <2 x i32>
2321  %z = add <2 x i32>%y, %y
2322  ret <2 x i32>%z
2323}
2324
2325define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
2326; SSE2-LABEL: zext_4i17_to_4i32:
2327; SSE2:       # %bb.0:
2328; SSE2-NEXT:    movq (%rdi), %rax
2329; SSE2-NEXT:    movd %eax, %xmm0
2330; SSE2-NEXT:    movq %rax, %rcx
2331; SSE2-NEXT:    shrq $17, %rcx
2332; SSE2-NEXT:    movd %ecx, %xmm1
2333; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2334; SSE2-NEXT:    movl 8(%rdi), %ecx
2335; SSE2-NEXT:    shll $13, %ecx
2336; SSE2-NEXT:    movq %rax, %rdx
2337; SSE2-NEXT:    shrq $51, %rdx
2338; SSE2-NEXT:    orl %ecx, %edx
2339; SSE2-NEXT:    movd %edx, %xmm1
2340; SSE2-NEXT:    shrq $34, %rax
2341; SSE2-NEXT:    movd %eax, %xmm2
2342; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2343; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2344; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2345; SSE2-NEXT:    retq
2346;
2347; SSSE3-LABEL: zext_4i17_to_4i32:
2348; SSSE3:       # %bb.0:
2349; SSSE3-NEXT:    movq (%rdi), %rax
2350; SSSE3-NEXT:    movd %eax, %xmm0
2351; SSSE3-NEXT:    movq %rax, %rcx
2352; SSSE3-NEXT:    shrq $17, %rcx
2353; SSSE3-NEXT:    movd %ecx, %xmm1
2354; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2355; SSSE3-NEXT:    movl 8(%rdi), %ecx
2356; SSSE3-NEXT:    shll $13, %ecx
2357; SSSE3-NEXT:    movq %rax, %rdx
2358; SSSE3-NEXT:    shrq $51, %rdx
2359; SSSE3-NEXT:    orl %ecx, %edx
2360; SSSE3-NEXT:    movd %edx, %xmm1
2361; SSSE3-NEXT:    shrq $34, %rax
2362; SSSE3-NEXT:    movd %eax, %xmm2
2363; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2364; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2365; SSSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2366; SSSE3-NEXT:    retq
2367;
2368; SSE41-LABEL: zext_4i17_to_4i32:
2369; SSE41:       # %bb.0:
2370; SSE41-NEXT:    movl 8(%rdi), %eax
2371; SSE41-NEXT:    shll $13, %eax
2372; SSE41-NEXT:    movq (%rdi), %rcx
2373; SSE41-NEXT:    movq %rcx, %rdx
2374; SSE41-NEXT:    shrq $51, %rdx
2375; SSE41-NEXT:    orl %eax, %edx
2376; SSE41-NEXT:    movq %rcx, %rax
2377; SSE41-NEXT:    shrq $17, %rax
2378; SSE41-NEXT:    movd %ecx, %xmm0
2379; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
2380; SSE41-NEXT:    shrq $34, %rcx
2381; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
2382; SSE41-NEXT:    pinsrd $3, %edx, %xmm0
2383; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2384; SSE41-NEXT:    retq
2385;
2386; AVX1-LABEL: zext_4i17_to_4i32:
2387; AVX1:       # %bb.0:
2388; AVX1-NEXT:    movl 8(%rdi), %eax
2389; AVX1-NEXT:    shll $13, %eax
2390; AVX1-NEXT:    movq (%rdi), %rcx
2391; AVX1-NEXT:    movq %rcx, %rdx
2392; AVX1-NEXT:    shrq $51, %rdx
2393; AVX1-NEXT:    orl %eax, %edx
2394; AVX1-NEXT:    movq %rcx, %rax
2395; AVX1-NEXT:    shrq $17, %rax
2396; AVX1-NEXT:    vmovd %ecx, %xmm0
2397; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2398; AVX1-NEXT:    shrq $34, %rcx
2399; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2400; AVX1-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2401; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2402; AVX1-NEXT:    retq
2403;
2404; AVX2-LABEL: zext_4i17_to_4i32:
2405; AVX2:       # %bb.0:
2406; AVX2-NEXT:    movl 8(%rdi), %eax
2407; AVX2-NEXT:    shll $13, %eax
2408; AVX2-NEXT:    movq (%rdi), %rcx
2409; AVX2-NEXT:    movq %rcx, %rdx
2410; AVX2-NEXT:    shrq $51, %rdx
2411; AVX2-NEXT:    orl %eax, %edx
2412; AVX2-NEXT:    movq %rcx, %rax
2413; AVX2-NEXT:    shrq $17, %rax
2414; AVX2-NEXT:    vmovd %ecx, %xmm0
2415; AVX2-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2416; AVX2-NEXT:    shrq $34, %rcx
2417; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2418; AVX2-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2419; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2420; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2421; AVX2-NEXT:    retq
2422;
2423; AVX512-LABEL: zext_4i17_to_4i32:
2424; AVX512:       # %bb.0:
2425; AVX512-NEXT:    movl 8(%rdi), %eax
2426; AVX512-NEXT:    shll $13, %eax
2427; AVX512-NEXT:    movq (%rdi), %rcx
2428; AVX512-NEXT:    movq %rcx, %rdx
2429; AVX512-NEXT:    shrq $51, %rdx
2430; AVX512-NEXT:    orl %eax, %edx
2431; AVX512-NEXT:    movq %rcx, %rax
2432; AVX512-NEXT:    shrq $17, %rax
2433; AVX512-NEXT:    vmovd %ecx, %xmm0
2434; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
2435; AVX512-NEXT:    shrq $34, %rcx
2436; AVX512-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
2437; AVX512-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0
2438; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
2439; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
2440; AVX512-NEXT:    retq
2441  %a = load <4 x i17>, ptr %ptr
2442  %b = zext <4 x i17> %a to <4 x i32>
2443  ret <4 x i32> %b
2444}
2445
2446define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp {
2447; SSE2-LABEL: zext_8i6_to_8i64:
2448; SSE2:       # %bb.0: # %entry
2449; SSE2-NEXT:    movd %edi, %xmm0
2450; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2451; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2452; SSE2-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2453; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2454; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2455; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [63,63]
2456; SSE2-NEXT:    pand %xmm4, %xmm0
2457; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2458; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2459; SSE2-NEXT:    pand %xmm4, %xmm1
2460; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2461; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2462; SSE2-NEXT:    pand %xmm4, %xmm2
2463; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2464; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2465; SSE2-NEXT:    pand %xmm4, %xmm3
2466; SSE2-NEXT:    retq
2467;
2468; SSSE3-LABEL: zext_8i6_to_8i64:
2469; SSSE3:       # %bb.0: # %entry
2470; SSSE3-NEXT:    movd %edi, %xmm0
2471; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2472; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2473; SSSE3-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2474; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0]
2475; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2476; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [63,63]
2477; SSSE3-NEXT:    pand %xmm4, %xmm0
2478; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2479; SSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5]
2480; SSSE3-NEXT:    pand %xmm4, %xmm1
2481; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
2482; SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5]
2483; SSSE3-NEXT:    pand %xmm4, %xmm2
2484; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2485; SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5]
2486; SSSE3-NEXT:    pand %xmm4, %xmm3
2487; SSSE3-NEXT:    retq
2488;
2489; SSE41-LABEL: zext_8i6_to_8i64:
2490; SSE41:       # %bb.0: # %entry
2491; SSE41-NEXT:    movd %edi, %xmm0
2492; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2493; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
2494; SSE41-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2495; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2496; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm4 = [63,63]
2497; SSE41-NEXT:    pand %xmm4, %xmm0
2498; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
2499; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2500; SSE41-NEXT:    pand %xmm4, %xmm1
2501; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
2502; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2503; SSE41-NEXT:    pand %xmm4, %xmm2
2504; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
2505; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
2506; SSE41-NEXT:    pand %xmm4, %xmm3
2507; SSE41-NEXT:    retq
2508;
2509; AVX1-LABEL: zext_8i6_to_8i64:
2510; AVX1:       # %bb.0: # %entry
2511; AVX1-NEXT:    vmovd %edi, %xmm0
2512; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2513; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2514; AVX1-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2515; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2516; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2517; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
2518; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2519; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2520; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
2521; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
2522; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
2523; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
2524; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
2525; AVX1-NEXT:    retq
2526;
2527; AVX2-LABEL: zext_8i6_to_8i64:
2528; AVX2:       # %bb.0: # %entry
2529; AVX2-NEXT:    vmovd %edi, %xmm0
2530; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2531; AVX2-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2532; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2533; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2534; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2535; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
2536; AVX2-NEXT:    retq
2537;
2538; AVX512-LABEL: zext_8i6_to_8i64:
2539; AVX512:       # %bb.0: # %entry
2540; AVX512-NEXT:    vmovd %edi, %xmm0
2541; AVX512-NEXT:    vpbroadcastw %xmm0, %xmm0
2542; AVX512-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2543; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2544; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
2545; AVX512-NEXT:    retq
2546entry:
2547  %a = trunc i32 %x to i6
2548  %b = insertelement <8 x i6> undef, i6 %a, i32 0
2549  %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer
2550  %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7>
2551  %e = zext <8 x i6> %d to <8 x i64>
2552  ret <8 x i64> %e
2553}
2554
2555define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
2556; SSE2-LABEL: splatshuf_zext_v4i64:
2557; SSE2:       # %bb.0:
2558; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2559; SSE2-NEXT:    pxor %xmm1, %xmm1
2560; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2561; SSE2-NEXT:    movdqa %xmm0, %xmm1
2562; SSE2-NEXT:    retq
2563;
2564; SSSE3-LABEL: splatshuf_zext_v4i64:
2565; SSSE3:       # %bb.0:
2566; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2567; SSSE3-NEXT:    pxor %xmm1, %xmm1
2568; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2569; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2570; SSSE3-NEXT:    retq
2571;
2572; SSE41-LABEL: splatshuf_zext_v4i64:
2573; SSE41:       # %bb.0:
2574; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2575; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2576; SSE41-NEXT:    movdqa %xmm0, %xmm1
2577; SSE41-NEXT:    retq
2578;
2579; AVX1-LABEL: splatshuf_zext_v4i64:
2580; AVX1:       # %bb.0:
2581; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2582; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
2583; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2584; AVX1-NEXT:    retq
2585;
2586; AVX2-LABEL: splatshuf_zext_v4i64:
2587; AVX2:       # %bb.0:
2588; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
2589; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2590; AVX2-NEXT:    retq
2591;
2592; AVX512-LABEL: splatshuf_zext_v4i64:
2593; AVX512:       # %bb.0:
2594; AVX512-NEXT:    vpbroadcastd %xmm0, %xmm0
2595; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2596; AVX512-NEXT:    retq
2597  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
2598  %ext = zext <4 x i32> %shuf to <4 x i64>
2599  ret <4 x i64> %ext
2600}
2601
2602define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
2603; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2604; SSE2:       # %bb.0:
2605; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2606; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
2607; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2608; SSE2-NEXT:    movdqa %xmm0, %xmm1
2609; SSE2-NEXT:    retq
2610;
2611; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
2612; SSSE3:       # %bb.0:
2613; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2614; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2615; SSSE3-NEXT:    retq
2616;
2617; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
2618; SSE41:       # %bb.0:
2619; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2620; SSE41-NEXT:    movdqa %xmm0, %xmm1
2621; SSE41-NEXT:    retq
2622;
2623; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
2624; AVX1:       # %bb.0:
2625; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2626; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2627; AVX1-NEXT:    retq
2628;
2629; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs:
2630; AVX2:       # %bb.0:
2631; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2632; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2633; AVX2-NEXT:    retq
2634;
2635; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs:
2636; AVX512:       # %bb.0:
2637; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
2638; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2639; AVX512-NEXT:    retq
2640  %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2641  %ext = zext <8 x i16> %shuf to <8 x i32>
2642  ret <8 x i32> %ext
2643}
2644
2645define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
2646; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2647; SSE2:       # %bb.0:
2648; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2649; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
2650; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2651; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
2652; SSE2-NEXT:    pxor %xmm1, %xmm1
2653; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2654; SSE2-NEXT:    movdqa %xmm0, %xmm1
2655; SSE2-NEXT:    retq
2656;
2657; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2658; SSSE3:       # %bb.0:
2659; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2660; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2661; SSSE3-NEXT:    retq
2662;
2663; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2664; SSE41:       # %bb.0:
2665; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
2666; SSE41-NEXT:    movdqa %xmm0, %xmm1
2667; SSE41-NEXT:    retq
2668;
2669; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2670; AVX1:       # %bb.0:
2671; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2672; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2673; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2674; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2675; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2676; AVX1-NEXT:    retq
2677;
2678; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2679; AVX2:       # %bb.0:
2680; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2681; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2682; AVX2-NEXT:    retq
2683;
2684; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef:
2685; AVX512:       # %bb.0:
2686; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
2687; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2688; AVX512-NEXT:    retq
2689  %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7>
2690  %ext = zext <8 x i16> %shuf to <8 x i32>
2691  ret <8 x i32> %ext
2692}
2693
2694define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) {
2695; SSE2-LABEL: splatshuf_zext_v16i16:
2696; SSE2:       # %bb.0:
2697; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2698; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
2699; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
2700; SSE2-NEXT:    pxor %xmm1, %xmm1
2701; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2702; SSE2-NEXT:    movdqa %xmm0, %xmm1
2703; SSE2-NEXT:    retq
2704;
2705; SSSE3-LABEL: splatshuf_zext_v16i16:
2706; SSSE3:       # %bb.0:
2707; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2708; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2709; SSSE3-NEXT:    retq
2710;
2711; SSE41-LABEL: splatshuf_zext_v16i16:
2712; SSE41:       # %bb.0:
2713; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2714; SSE41-NEXT:    movdqa %xmm0, %xmm1
2715; SSE41-NEXT:    retq
2716;
2717; AVX1-LABEL: splatshuf_zext_v16i16:
2718; AVX1:       # %bb.0:
2719; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero
2720; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2721; AVX1-NEXT:    retq
2722;
2723; AVX2-LABEL: splatshuf_zext_v16i16:
2724; AVX2:       # %bb.0:
2725; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2726; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2727; AVX2-NEXT:    retq
2728;
2729; AVX512-LABEL: splatshuf_zext_v16i16:
2730; AVX512:       # %bb.0:
2731; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14]
2732; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2733; AVX512-NEXT:    retq
2734  %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
2735  %ext = zext <16 x i8> %shuf to <16 x i16>
2736  ret <16 x i16> %ext
2737}
2738