xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll (revision 2a18162daa96e1cca5144a634bb03134405b52cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ALL,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=ALL,SSE,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLBW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLVBMI
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLVBMI
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,XOP,XOPAVX1
14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,XOP,XOPAVX2
15
16define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
17; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
18; SSE2:       # %bb.0:
19; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
20; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
21; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
22; SSE2-NEXT:    retq
23;
24; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
25; SSSE3:       # %bb.0:
26; SSSE3-NEXT:    pxor %xmm1, %xmm1
27; SSSE3-NEXT:    pshufb %xmm1, %xmm0
28; SSSE3-NEXT:    retq
29;
30; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
31; SSE41:       # %bb.0:
32; SSE41-NEXT:    pxor %xmm1, %xmm1
33; SSE41-NEXT:    pshufb %xmm1, %xmm0
34; SSE41-NEXT:    retq
35;
36; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
37; AVX1:       # %bb.0:
38; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
40; AVX1-NEXT:    retq
41;
42; AVX2OR512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
43; AVX2OR512VL:       # %bb.0:
44; AVX2OR512VL-NEXT:    vpbroadcastb %xmm0, %xmm0
45; AVX2OR512VL-NEXT:    retq
46;
47; XOPAVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
48; XOPAVX1:       # %bb.0:
49; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
50; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
51; XOPAVX1-NEXT:    retq
52;
53; XOPAVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
54; XOPAVX2:       # %bb.0:
55; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
56; XOPAVX2-NEXT:    retq
57  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
58  ret <16 x i8> %shuffle
59}
60
61define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
62; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
63; SSE2:       # %bb.0:
64; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
65; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
67; SSE2-NEXT:    retq
68;
69; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
70; SSSE3:       # %bb.0:
71; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
72; SSSE3-NEXT:    retq
73;
74; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
75; SSE41:       # %bb.0:
76; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
77; SSE41-NEXT:    retq
78;
79; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
80; AVX:       # %bb.0:
81; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
82; AVX-NEXT:    retq
83  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
84  ret <16 x i8> %shuffle
85}
86
87define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
88; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
89; SSE2:       # %bb.0:
90; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
91; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
92; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
93; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
94; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
95; SSE2-NEXT:    retq
96;
97; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
98; SSSE3:       # %bb.0:
99; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
100; SSSE3-NEXT:    retq
101;
102; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
103; SSE41:       # %bb.0:
104; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
105; SSE41-NEXT:    retq
106;
107; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
108; AVX:       # %bb.0:
109; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
110; AVX-NEXT:    retq
111  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
112  ret <16 x i8> %shuffle
113}
114
115define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
116; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
117; SSE:       # %bb.0:
118; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
119; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
120; SSE-NEXT:    retq
121;
122; AVX1-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
123; AVX1:       # %bb.0:
124; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
125; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
126; AVX1-NEXT:    retq
127;
128; AVX2-SLOW-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
129; AVX2-SLOW:       # %bb.0:
130; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
131; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
132; AVX2-SLOW-NEXT:    retq
133;
134; AVX2-FAST-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
135; AVX2-FAST:       # %bb.0:
136; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
137; AVX2-FAST-NEXT:    retq
138;
139; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
140; AVX512VL:       # %bb.0:
141; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
142; AVX512VL-NEXT:    retq
143;
144; XOP-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
145; XOP:       # %bb.0:
146; XOP-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
147; XOP-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
148; XOP-NEXT:    retq
149  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
150  ret <16 x i8> %shuffle
151}
152
153define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
154; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
155; SSE:       # %bb.0:
156; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
157; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
158; SSE-NEXT:    retq
159;
160; AVX1-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
161; AVX1:       # %bb.0:
162; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
163; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
164; AVX1-NEXT:    retq
165;
166; AVX2-SLOW-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
167; AVX2-SLOW:       # %bb.0:
168; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
169; AVX2-SLOW-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
170; AVX2-SLOW-NEXT:    retq
171;
172; AVX2-FAST-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
173; AVX2-FAST:       # %bb.0:
174; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
175; AVX2-FAST-NEXT:    retq
176;
177; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
178; AVX512VL:       # %bb.0:
179; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
180; AVX512VL-NEXT:    retq
181;
182; XOP-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
183; XOP:       # %bb.0:
184; XOP-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
185; XOP-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
186; XOP-NEXT:    retq
187  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
188  ret <16 x i8> %shuffle
189}
190
191define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
192; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
193; SSE2:       # %bb.0:
194; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
195; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
196; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
197; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
198; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
199; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
200; SSE2-NEXT:    retq
201;
202; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
203; SSSE3:       # %bb.0:
204; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
205; SSSE3-NEXT:    retq
206;
207; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
208; SSE41:       # %bb.0:
209; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
210; SSE41-NEXT:    retq
211;
212; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
213; AVX:       # %bb.0:
214; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
215; AVX-NEXT:    retq
216  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
217  ret <16 x i8> %shuffle
218}
219
220define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
221; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
222; SSE:       # %bb.0:
223; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
224; SSE-NEXT:    retq
225;
226; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
227; AVX:       # %bb.0:
228; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
229; AVX-NEXT:    retq
230  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
231  ret <16 x i8> %shuffle
232}
233
234define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
235; SSE-LABEL: shuffle_v16i8_0101010101010101:
236; SSE:       # %bb.0:
237; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
238; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
239; SSE-NEXT:    retq
240;
241; AVX1-LABEL: shuffle_v16i8_0101010101010101:
242; AVX1:       # %bb.0:
243; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
244; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
245; AVX1-NEXT:    retq
246;
247; AVX2OR512VL-LABEL: shuffle_v16i8_0101010101010101:
248; AVX2OR512VL:       # %bb.0:
249; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %xmm0
250; AVX2OR512VL-NEXT:    retq
251;
252; XOPAVX1-LABEL: shuffle_v16i8_0101010101010101:
253; XOPAVX1:       # %bb.0:
254; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
255; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
256; XOPAVX1-NEXT:    retq
257;
258; XOPAVX2-LABEL: shuffle_v16i8_0101010101010101:
259; XOPAVX2:       # %bb.0:
260; XOPAVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
261; XOPAVX2-NEXT:    retq
262  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
263  ret <16 x i8> %shuffle
264}
265
266define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
267; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
268; SSE:       # %bb.0:
269; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
270; SSE-NEXT:    retq
271;
272; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
273; AVX:       # %bb.0:
274; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
275; AVX-NEXT:    retq
276  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
277  ret <16 x i8> %shuffle
278}
279
280define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
281; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
282; SSE:       # %bb.0:
283; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
284; SSE-NEXT:    retq
285;
286; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
287; AVX:       # %bb.0:
288; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
289; AVX-NEXT:    retq
290  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
291  ret <16 x i8> %shuffle
292}
293
294define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
295; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
296; SSE:       # %bb.0:
297; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
298; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
299; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
300; SSE-NEXT:    movdqa %xmm1, %xmm0
301; SSE-NEXT:    retq
302;
303; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
304; AVX1:       # %bb.0:
305; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
306; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
307; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
308; AVX1-NEXT:    retq
309;
310; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
311; AVX2OR512VL:       # %bb.0:
312; AVX2OR512VL-NEXT:    vpbroadcastb %xmm1, %xmm1
313; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
314; AVX2OR512VL-NEXT:    retq
315;
316; XOPAVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
317; XOPAVX1:       # %bb.0:
318; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[0],xmm0[1],xmm1[0],xmm0[2],xmm1[0],xmm0[3],xmm1[0],xmm0[4],xmm1[0],xmm0[5],xmm1[0],xmm0[6],xmm1[0],xmm0[7]
319; XOPAVX1-NEXT:    retq
320;
321; XOPAVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
322; XOPAVX2:       # %bb.0:
323; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
324; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
325; XOPAVX2-NEXT:    retq
326  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
327  ret <16 x i8> %shuffle
328}
329
330define <16 x i8> @shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<16 x i8> %a, <16 x i8> %b) {
331; SSE2-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
332; SSE2:       # %bb.0:
333; SSE2-NEXT:    pxor %xmm1, %xmm1
334; SSE2-NEXT:    movdqa %xmm0, %xmm2
335; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
336; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
337; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
338; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
339; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
340; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
341; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
342; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
343; SSE2-NEXT:    packuswb %xmm2, %xmm0
344; SSE2-NEXT:    retq
345;
346; SSSE3-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
347; SSSE3:       # %bb.0:
348; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
349; SSSE3-NEXT:    retq
350;
351; SSE41-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
352; SSE41:       # %bb.0:
353; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
354; SSE41-NEXT:    retq
355;
356; AVX-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
357; AVX:       # %bb.0:
358; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
359; AVX-NEXT:    retq
360  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
361  ret <16 x i8> %shuffle
362}
363
364define <16 x i8> @shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08(<16 x i8> %a, <16 x i8> %b) {
365; SSE2-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08:
366; SSE2:       # %bb.0:
367; SSE2-NEXT:    pxor %xmm1, %xmm1
368; SSE2-NEXT:    movdqa %xmm0, %xmm2
369; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
370; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
371; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
372; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
373; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
374; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
375; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
376; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
377; SSE2-NEXT:    packuswb %xmm2, %xmm0
378; SSE2-NEXT:    retq
379;
380; SSSE3-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08:
381; SSSE3:       # %bb.0:
382; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
383; SSSE3-NEXT:    retq
384;
385; SSE41-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08:
386; SSE41:       # %bb.0:
387; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
388; SSE41-NEXT:    retq
389;
390; AVX-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08:
391; AVX:       # %bb.0:
392; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
393; AVX-NEXT:    retq
394  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
395  ret <16 x i8> %shuffle
396}
397
398define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
399; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
400; SSE2:       # %bb.0:
401; SSE2-NEXT:    pxor %xmm1, %xmm1
402; SSE2-NEXT:    movdqa %xmm0, %xmm2
403; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
404; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
405; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
406; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
407; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
408; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
409; SSE2-NEXT:    packuswb %xmm2, %xmm0
410; SSE2-NEXT:    retq
411;
412; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
413; SSSE3:       # %bb.0:
414; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
415; SSSE3-NEXT:    retq
416;
417; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
418; SSE41:       # %bb.0:
419; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
420; SSE41-NEXT:    retq
421;
422; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
423; AVX:       # %bb.0:
424; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
425; AVX-NEXT:    retq
426  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
427  ret <16 x i8> %shuffle
428}
429
430define <16 x i8> @shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14(<16 x i8> %a, <16 x i8> %b) {
431; SSE2-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
432; SSE2:       # %bb.0:
433; SSE2-NEXT:    movdqa %xmm0, %xmm1
434; SSE2-NEXT:    psrlw $8, %xmm1
435; SSE2-NEXT:    psllw $8, %xmm0
436; SSE2-NEXT:    por %xmm1, %xmm0
437; SSE2-NEXT:    retq
438;
439; SSSE3-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
440; SSSE3:       # %bb.0:
441; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
442; SSSE3-NEXT:    retq
443;
444; SSE41-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
445; SSE41:       # %bb.0:
446; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
447; SSE41-NEXT:    retq
448;
449; AVX1-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
450; AVX1:       # %bb.0:
451; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
452; AVX1-NEXT:    retq
453;
454; AVX2OR512VL-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
455; AVX2OR512VL:       # %bb.0:
456; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
457; AVX2OR512VL-NEXT:    retq
458;
459; XOP-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14:
460; XOP:       # %bb.0:
461; XOP-NEXT:    vprotw $8, %xmm0, %xmm0
462; XOP-NEXT:    retq
463  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
464  ret <16 x i8> %shuffle
465}
466
467define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
468; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
469; SSE2:       # %bb.0:
470; SSE2-NEXT:    pxor %xmm2, %xmm2
471; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
472; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
473; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
474; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
475; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
476; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
477; SSE2-NEXT:    packuswb %xmm1, %xmm0
478; SSE2-NEXT:    retq
479;
480; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
481; SSSE3:       # %bb.0:
482; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
483; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
484; SSSE3-NEXT:    retq
485;
486; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
487; SSE41:       # %bb.0:
488; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
489; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
490; SSE41-NEXT:    retq
491;
492; AVX1-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
493; AVX1:       # %bb.0:
494; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
495; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
496; AVX1-NEXT:    retq
497;
498; AVX2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
499; AVX2:       # %bb.0:
500; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
501; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
502; AVX2-NEXT:    retq
503;
504; AVX512VLBW-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
505; AVX512VLBW:       # %bb.0:
506; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
507; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
508; AVX512VLBW-NEXT:    retq
509;
510; AVX512VLVBMI-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
511; AVX512VLVBMI:       # %bb.0:
512; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,19,18,17,16,23,22,21,20]
513; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
514; AVX512VLVBMI-NEXT:    retq
515;
516; XOP-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
517; XOP:       # %bb.0:
518; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],xmm1[3,2,1,0,7,6,5,4]
519; XOP-NEXT:    retq
520  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
521  ret <16 x i8> %shuffle
522}
523
524define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
525; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
526; SSE2:       # %bb.0:
527; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
528; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
529; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
530; SSE2-NEXT:    pxor %xmm1, %xmm1
531; SSE2-NEXT:    movdqa %xmm0, %xmm2
532; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
533; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
534; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
535; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
536; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
537; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm0[3,2,1,0,4,5,6,7]
538; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,7,6,5,4]
539; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
540; SSE2-NEXT:    packuswb %xmm1, %xmm0
541; SSE2-NEXT:    retq
542;
543; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
544; SSSE3:       # %bb.0:
545; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
546; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
547; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
548; SSSE3-NEXT:    retq
549;
550; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
551; SSE41:       # %bb.0:
552; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
553; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
554; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
555; SSE41-NEXT:    retq
556;
557; AVX1-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
558; AVX1:       # %bb.0:
559; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
560; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
561; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
562; AVX1-NEXT:    retq
563;
564; AVX2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
565; AVX2:       # %bb.0:
566; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
567; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
568; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
569; AVX2-NEXT:    retq
570;
571; AVX512VLBW-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
572; AVX512VLBW:       # %bb.0:
573; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
574; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
575; AVX512VLBW-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
576; AVX512VLBW-NEXT:    retq
577;
578; AVX512VLVBMI-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
579; AVX512VLVBMI:       # %bb.0:
580; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,2,1,0,31,30,29,28,11,10,9,8,23,22,21,20]
581; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
582; AVX512VLVBMI-NEXT:    retq
583;
584; XOP-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
585; XOP:       # %bb.0:
586; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[3,2,1,0],xmm1[15,14,13,12],xmm0[11,10,9,8],xmm1[7,6,5,4]
587; XOP-NEXT:    retq
588  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
589  ret <16 x i8> %shuffle
590}
591
592define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) {
593; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
594; SSE2:       # %bb.0:
595; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
596; SSE2-NEXT:    andps %xmm2, %xmm0
597; SSE2-NEXT:    andnps %xmm1, %xmm2
598; SSE2-NEXT:    orps %xmm2, %xmm0
599; SSE2-NEXT:    retq
600;
601; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
602; SSSE3:       # %bb.0:
603; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
604; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
605; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
606; SSSE3-NEXT:    retq
607;
608; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
609; SSE41:       # %bb.0:
610; SSE41-NEXT:    movdqa %xmm0, %xmm2
611; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
612; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
613; SSE41-NEXT:    movdqa %xmm1, %xmm0
614; SSE41-NEXT:    retq
615;
616; AVX1-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
617; AVX1:       # %bb.0:
618; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
619; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
620; AVX1-NEXT:    retq
621;
622; AVX2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
623; AVX2:       # %bb.0:
624; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
625; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
626; AVX2-NEXT:    retq
627;
628; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
629; AVX512VL:       # %bb.0:
630; AVX512VL-NEXT:    movw $-21846, %ax # imm = 0xAAAA
631; AVX512VL-NEXT:    kmovd %eax, %k1
632; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
633; AVX512VL-NEXT:    retq
634;
635; XOPAVX1-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
636; XOPAVX1:       # %bb.0:
637; XOPAVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
638; XOPAVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
639; XOPAVX1-NEXT:    retq
640;
641; XOPAVX2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
642; XOPAVX2:       # %bb.0:
643; XOPAVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
644; XOPAVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
645; XOPAVX2-NEXT:    retq
646  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
647  ret <16 x i8> %shuffle
648}
649
650define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) {
651; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
652; SSE2:       # %bb.0:
653; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
654; SSE2-NEXT:    andps %xmm2, %xmm0
655; SSE2-NEXT:    andnps %xmm1, %xmm2
656; SSE2-NEXT:    orps %xmm2, %xmm0
657; SSE2-NEXT:    retq
658;
659; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
660; SSSE3:       # %bb.0:
661; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
662; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
663; SSSE3-NEXT:    por %xmm1, %xmm0
664; SSSE3-NEXT:    retq
665;
666; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
667; SSE41:       # %bb.0:
668; SSE41-NEXT:    movdqa %xmm0, %xmm2
669; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
670; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
671; SSE41-NEXT:    movdqa %xmm1, %xmm0
672; SSE41-NEXT:    retq
673;
674; AVX1-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
675; AVX1:       # %bb.0:
676; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
677; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
678; AVX1-NEXT:    retq
679;
680; AVX2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
681; AVX2:       # %bb.0:
682; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
683; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
684; AVX2-NEXT:    retq
685;
686; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
687; AVX512VL:       # %bb.0:
688; AVX512VL-NEXT:    movw $-30584, %ax # imm = 0x8888
689; AVX512VL-NEXT:    kmovd %eax, %k1
690; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
691; AVX512VL-NEXT:    retq
692;
693; XOPAVX1-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
694; XOPAVX1:       # %bb.0:
695; XOPAVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
696; XOPAVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
697; XOPAVX1-NEXT:    retq
698;
699; XOPAVX2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
700; XOPAVX2:       # %bb.0:
701; XOPAVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
702; XOPAVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
703; XOPAVX2-NEXT:    retq
704  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
705  ret <16 x i8> %shuffle
706}
707
708define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
709; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
710; SSE:       # %bb.0:
711; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712; SSE-NEXT:    retq
713;
714; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
715; AVX1OR2:       # %bb.0:
716; AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
717; AVX1OR2-NEXT:    retq
718;
719; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
720; AVX512VL:       # %bb.0:
721; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
722; AVX512VL-NEXT:    retq
723  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
724  ret <16 x i8> %shuffle
725}
726
727define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
728; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
729; SSE2:       # %bb.0:
730; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
731; SSE2-NEXT:    andps %xmm2, %xmm0
732; SSE2-NEXT:    andnps %xmm1, %xmm2
733; SSE2-NEXT:    orps %xmm2, %xmm0
734; SSE2-NEXT:    retq
735;
736; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
737; SSSE3:       # %bb.0:
738; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
739; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
740; SSSE3-NEXT:    por %xmm1, %xmm0
741; SSSE3-NEXT:    retq
742;
743; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
744; SSE41:       # %bb.0:
745; SSE41-NEXT:    movdqa %xmm0, %xmm2
746; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
747; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
748; SSE41-NEXT:    movdqa %xmm1, %xmm0
749; SSE41-NEXT:    retq
750;
751; AVX1-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
752; AVX1:       # %bb.0:
753; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
754; AVX1-NEXT:    # xmm2 = mem[0,0]
755; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
756; AVX1-NEXT:    retq
757;
758; AVX2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
759; AVX2:       # %bb.0:
760; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
761; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
762; AVX2-NEXT:    retq
763;
764; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
765; AVX512VL:       # %bb.0:
766; AVX512VL-NEXT:    movw $-28528, %ax # imm = 0x9090
767; AVX512VL-NEXT:    kmovd %eax, %k1
768; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
769; AVX512VL-NEXT:    retq
770;
771; XOPAVX1-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
772; XOPAVX1:       # %bb.0:
773; XOPAVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
774; XOPAVX1-NEXT:    # xmm2 = mem[0,0]
775; XOPAVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
776; XOPAVX1-NEXT:    retq
777;
778; XOPAVX2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
779; XOPAVX2:       # %bb.0:
780; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
781; XOPAVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
782; XOPAVX2-NEXT:    retq
783  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
784  ret <16 x i8> %shuffle
785}
786
787define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) {
788; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
789; SSE2:       # %bb.0:
790; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
791; SSE2-NEXT:    andps %xmm2, %xmm1
792; SSE2-NEXT:    andnps %xmm0, %xmm2
793; SSE2-NEXT:    orps %xmm1, %xmm2
794; SSE2-NEXT:    movaps %xmm2, %xmm0
795; SSE2-NEXT:    retq
796;
797; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
798; SSSE3:       # %bb.0:
799; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
800; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
801; SSSE3-NEXT:    por %xmm1, %xmm0
802; SSSE3-NEXT:    retq
803;
804; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
805; SSE41:       # %bb.0:
806; SSE41-NEXT:    movdqa %xmm0, %xmm2
807; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
808; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
809; SSE41-NEXT:    movdqa %xmm2, %xmm0
810; SSE41-NEXT:    retq
811;
812; AVX1OR2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
813; AVX1OR2:       # %bb.0:
814; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
815; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
816; AVX1OR2-NEXT:    retq
817;
818; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
819; AVX512VL:       # %bb.0:
820; AVX512VL-NEXT:    movw $-21264, %ax # imm = 0xACF0
821; AVX512VL-NEXT:    kmovd %eax, %k1
822; AVX512VL-NEXT:    vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
823; AVX512VL-NEXT:    retq
824  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
825  ret <16 x i8> %shuffle
826}
827
828define <16 x i8> @shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a, <16 x i8> %b)  {
829; SSE2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
830; SSE2:       # %bb.0:
831; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
832; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
833; SSE2-NEXT:    psrlq $16, %xmm0
834; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
835; SSE2-NEXT:    packuswb %xmm0, %xmm0
836; SSE2-NEXT:    retq
837;
838; SSSE3-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
839; SSSE3:       # %bb.0:
840; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
841; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
842; SSSE3-NEXT:    retq
843;
844; SSE41-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
845; SSE41:       # %bb.0:
846; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
847; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
848; SSE41-NEXT:    retq
849;
850; AVX1-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
851; AVX1:       # %bb.0:
852; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
853; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
854; AVX1-NEXT:    retq
855;
856; AVX2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
857; AVX2:       # %bb.0:
858; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
859; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
860; AVX2-NEXT:    retq
861;
862; AVX512VLBW-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
863; AVX512VLBW:       # %bb.0:
864; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
865; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
866; AVX512VLBW-NEXT:    retq
867;
868; AVX512VLVBMI-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
869; AVX512VLVBMI:       # %bb.0:
870; AVX512VLVBMI-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [2,20,2,20,2,20,2,20,2,20,2,20,2,20,2,20]
871; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
872; AVX512VLVBMI-NEXT:    retq
873;
874; XOP-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
875; XOP:       # %bb.0:
876; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[2],xmm1[4],xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u]
877; XOP-NEXT:    retq
878  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 20, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
879  ret <16 x i8> %shuffle
880}
881
882; PR39387
883define <16 x i8> @shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4(<16 x i8> %a, <16 x i8> %b) {
884; SSE2-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
885; SSE2:       # %bb.0:
886; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,255]
887; SSE2-NEXT:    movdqa %xmm0, %xmm3
888; SSE2-NEXT:    pand %xmm2, %xmm3
889; SSE2-NEXT:    pandn %xmm1, %xmm2
890; SSE2-NEXT:    por %xmm3, %xmm2
891; SSE2-NEXT:    pxor %xmm1, %xmm1
892; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
893; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
894; SSE2-NEXT:    movdqa %xmm0, %xmm1
895; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,0,0,65535]
896; SSE2-NEXT:    pand %xmm3, %xmm0
897; SSE2-NEXT:    pandn %xmm2, %xmm3
898; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9]
899; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
900; SSE2-NEXT:    por %xmm2, %xmm1
901; SSE2-NEXT:    por %xmm0, %xmm3
902; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,2,0]
903; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,5,7]
904; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
905; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,3,4,5,6,7]
906; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
907; SSE2-NEXT:    packuswb %xmm0, %xmm1
908; SSE2-NEXT:    movdqa %xmm1, %xmm0
909; SSE2-NEXT:    retq
910;
911; SSSE3-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
912; SSSE3:       # %bb.0:
913; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
914; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9]
915; SSSE3-NEXT:    retq
916;
917; SSE41-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
918; SSE41:       # %bb.0:
919; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
920; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9]
921; SSE41-NEXT:    retq
922;
923; AVX1-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
924; AVX1:       # %bb.0:
925; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
926; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9]
927; AVX1-NEXT:    retq
928;
929; AVX2-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
930; AVX2:       # %bb.0:
931; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
932; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9]
933; AVX2-NEXT:    retq
934;
935; AVX512VLBW-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
936; AVX512VLBW:       # %bb.0:
937; AVX512VLBW-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
938; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9]
939; AVX512VLBW-NEXT:    retq
940;
941; AVX512VLVBMI-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
942; AVX512VLVBMI:       # %bb.0:
943; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [5,6,7,8,9,10,27,28,29,30,30,1,1,2,3,4]
944; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
945; AVX512VLVBMI-NEXT:    retq
946;
947; XOP-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:
948; XOP:       # %bb.0:
949; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10],xmm1[11,12,13,14,14],xmm0[1,1,2,3,4]
950; XOP-NEXT:    retq
951  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 27, i32 28, i32 29, i32 30, i32 30, i32 1, i32 1, i32 2, i32 3, i32 4>
952  ret <16 x i8> %1
953}
954
955define <16 x i8> @shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<16 x i8> %a, <16 x i8> %b) {
956; SSE2-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
957; SSE2:       # %bb.0:
958; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
959; SSE2-NEXT:    pand %xmm2, %xmm1
960; SSE2-NEXT:    pand %xmm2, %xmm0
961; SSE2-NEXT:    packuswb %xmm1, %xmm0
962; SSE2-NEXT:    retq
963;
964; SSSE3-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
965; SSSE3:       # %bb.0:
966; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
967; SSSE3-NEXT:    pand %xmm2, %xmm1
968; SSSE3-NEXT:    pand %xmm2, %xmm0
969; SSSE3-NEXT:    packuswb %xmm1, %xmm0
970; SSSE3-NEXT:    retq
971;
972; SSE41-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
973; SSE41:       # %bb.0:
974; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
975; SSE41-NEXT:    pand %xmm2, %xmm1
976; SSE41-NEXT:    pand %xmm2, %xmm0
977; SSE41-NEXT:    packuswb %xmm1, %xmm0
978; SSE41-NEXT:    retq
979;
980; AVX1-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
981; AVX1:       # %bb.0:
982; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
983; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
984; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
985; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
986; AVX1-NEXT:    retq
987;
988; AVX2-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
989; AVX2:       # %bb.0:
990; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
991; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
992; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
993; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
994; AVX2-NEXT:    retq
995;
996; AVX512VL-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
997; AVX512VL:       # %bb.0:
998; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
999; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1000; AVX512VL-NEXT:    vpmovwb %ymm0, %xmm0
1001; AVX512VL-NEXT:    vzeroupper
1002; AVX512VL-NEXT:    retq
1003;
1004; XOP-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1005; XOP:       # %bb.0:
1006; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
1007; XOP-NEXT:    retq
1008  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1009  ret <16 x i8> %shuffle
1010}
1011
1012define <16 x i8> @shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31(<16 x i8> %a, <16 x i8> %b) {
1013; SSE2-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1014; SSE2:       # %bb.0:
1015; SSE2-NEXT:    psrlw $8, %xmm1
1016; SSE2-NEXT:    psrlw $8, %xmm0
1017; SSE2-NEXT:    packuswb %xmm1, %xmm0
1018; SSE2-NEXT:    retq
1019;
1020; SSSE3-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1021; SSSE3:       # %bb.0:
1022; SSSE3-NEXT:    movq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
1023; SSSE3-NEXT:    pshufb %xmm2, %xmm1
1024; SSSE3-NEXT:    pshufb %xmm2, %xmm0
1025; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1026; SSSE3-NEXT:    retq
1027;
1028; SSE41-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1029; SSE41:       # %bb.0:
1030; SSE41-NEXT:    movq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
1031; SSE41-NEXT:    pshufb %xmm2, %xmm1
1032; SSE41-NEXT:    pshufb %xmm2, %xmm0
1033; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1034; SSE41-NEXT:    retq
1035;
1036; AVX1-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1037; AVX1:       # %bb.0:
1038; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
1039; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1040; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1041; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1042; AVX1-NEXT:    retq
1043;
1044; AVX2-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1045; AVX2:       # %bb.0:
1046; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
1047; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1048; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1049; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1050; AVX2-NEXT:    retq
1051;
1052; AVX512VLBW-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1053; AVX512VLBW:       # %bb.0:
1054; AVX512VLBW-NEXT:    vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
1055; AVX512VLBW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1056; AVX512VLBW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1057; AVX512VLBW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1058; AVX512VLBW-NEXT:    retq
1059;
1060; AVX512VLVBMI-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1061; AVX512VLVBMI:       # %bb.0:
1062; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
1063; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
1064; AVX512VLVBMI-NEXT:    retq
1065;
1066; XOP-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
1067; XOP:       # %bb.0:
1068; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15]
1069; XOP-NEXT:    retq
1070  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1071  ret <16 x i8> %shuffle
1072}
1073
1074; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780
1075
1076define <16 x i8> @load_fold_pblendvb(ptr %px, <16 x i8> %y) {
1077; SSE2-LABEL: load_fold_pblendvb:
1078; SSE2:       # %bb.0:
1079; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1080; SSE2-NEXT:    andps %xmm1, %xmm0
1081; SSE2-NEXT:    andnps (%rdi), %xmm1
1082; SSE2-NEXT:    orps %xmm1, %xmm0
1083; SSE2-NEXT:    retq
1084;
1085; SSSE3-LABEL: load_fold_pblendvb:
1086; SSSE3:       # %bb.0:
1087; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1088; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[3],zero,zero,zero,xmm0[7,8,9],zero,xmm0[11],zero,zero,zero,xmm0[15]
1089; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,xmm1[2],zero,xmm1[4,5,6],zero,zero,zero,xmm1[10],zero,xmm1[12,13,14],zero
1090; SSSE3-NEXT:    por %xmm1, %xmm0
1091; SSSE3-NEXT:    retq
1092;
1093; SSE41-LABEL: load_fold_pblendvb:
1094; SSE41:       # %bb.0:
1095; SSE41-NEXT:    movdqa %xmm0, %xmm1
1096; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
1097; SSE41-NEXT:    pblendvb %xmm0, (%rdi), %xmm1
1098; SSE41-NEXT:    movdqa %xmm1, %xmm0
1099; SSE41-NEXT:    retq
1100;
1101; AVX1-LABEL: load_fold_pblendvb:
1102; AVX1:       # %bb.0:
1103; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
1104; AVX1-NEXT:    # xmm1 = mem[0,0]
1105; AVX1-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1106; AVX1-NEXT:    retq
1107;
1108; AVX2-LABEL: load_fold_pblendvb:
1109; AVX2:       # %bb.0:
1110; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
1111; AVX2-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1112; AVX2-NEXT:    retq
1113;
1114; AVX512VL-LABEL: load_fold_pblendvb:
1115; AVX512VL:       # %bb.0:
1116; AVX512VL-NEXT:    movw $29812, %ax # imm = 0x7474
1117; AVX512VL-NEXT:    kmovd %eax, %k1
1118; AVX512VL-NEXT:    vmovdqu8 (%rdi), %xmm0 {%k1}
1119; AVX512VL-NEXT:    retq
1120;
1121; XOPAVX1-LABEL: load_fold_pblendvb:
1122; XOPAVX1:       # %bb.0:
1123; XOPAVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
1124; XOPAVX1-NEXT:    # xmm1 = mem[0,0]
1125; XOPAVX1-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1126; XOPAVX1-NEXT:    retq
1127;
1128; XOPAVX2-LABEL: load_fold_pblendvb:
1129; XOPAVX2:       # %bb.0:
1130; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
1131; XOPAVX2-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1132; XOPAVX2-NEXT:    retq
1133  %x = load <16 x i8>, ptr %px, align 16
1134  %select = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
1135  ret <16 x i8> %select
1136}
1137
1138define <16 x i8> @load_fold_pblendvb_commute(ptr %px, <16 x i8> %y) {
1139; SSE2-LABEL: load_fold_pblendvb_commute:
1140; SSE2:       # %bb.0:
1141; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1142; SSE2-NEXT:    movaps %xmm1, %xmm2
1143; SSE2-NEXT:    andnps %xmm0, %xmm2
1144; SSE2-NEXT:    andps (%rdi), %xmm1
1145; SSE2-NEXT:    orps %xmm2, %xmm1
1146; SSE2-NEXT:    movaps %xmm1, %xmm0
1147; SSE2-NEXT:    retq
1148;
1149; SSSE3-LABEL: load_fold_pblendvb_commute:
1150; SSSE3:       # %bb.0:
1151; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1152; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,xmm0[4,5,6],zero,zero,zero,xmm0[10],zero,xmm0[12,13,14],zero
1153; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3],zero,zero,zero,xmm1[7,8,9],zero,xmm1[11],zero,zero,zero,xmm1[15]
1154; SSSE3-NEXT:    por %xmm1, %xmm0
1155; SSSE3-NEXT:    retq
1156;
1157; SSE41-LABEL: load_fold_pblendvb_commute:
1158; SSE41:       # %bb.0:
1159; SSE41-NEXT:    movdqa %xmm0, %xmm1
1160; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1161; SSE41-NEXT:    pblendvb %xmm0, (%rdi), %xmm1
1162; SSE41-NEXT:    movdqa %xmm1, %xmm0
1163; SSE41-NEXT:    retq
1164;
1165; AVX1-LABEL: load_fold_pblendvb_commute:
1166; AVX1:       # %bb.0:
1167; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1168; AVX1-NEXT:    # xmm1 = mem[0,0]
1169; AVX1-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1170; AVX1-NEXT:    retq
1171;
1172; AVX2-LABEL: load_fold_pblendvb_commute:
1173; AVX2:       # %bb.0:
1174; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1175; AVX2-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1176; AVX2-NEXT:    retq
1177;
1178; AVX512VL-LABEL: load_fold_pblendvb_commute:
1179; AVX512VL:       # %bb.0:
1180; AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
1181; AVX512VL-NEXT:    movw $29812, %ax # imm = 0x7474
1182; AVX512VL-NEXT:    kmovd %eax, %k1
1183; AVX512VL-NEXT:    vmovdqu8 %xmm0, %xmm1 {%k1}
1184; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
1185; AVX512VL-NEXT:    retq
1186;
1187; XOPAVX1-LABEL: load_fold_pblendvb_commute:
1188; XOPAVX1:       # %bb.0:
1189; XOPAVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1190; XOPAVX1-NEXT:    # xmm1 = mem[0,0]
1191; XOPAVX1-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1192; XOPAVX1-NEXT:    retq
1193;
1194; XOPAVX2-LABEL: load_fold_pblendvb_commute:
1195; XOPAVX2:       # %bb.0:
1196; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
1197; XOPAVX2-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
1198; XOPAVX2-NEXT:    retq
1199  %x = load <16 x i8>, ptr %px, align 16
1200  %select = shufflevector <16 x i8> %y, <16 x i8> %x, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
1201  ret <16 x i8> %select
1202}
1203
1204define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
1205; SSE2-LABEL: trunc_v4i32_shuffle:
1206; SSE2:       # %bb.0:
1207; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1208; SSE2-NEXT:    packuswb %xmm0, %xmm0
1209; SSE2-NEXT:    packuswb %xmm0, %xmm0
1210; SSE2-NEXT:    retq
1211;
1212; SSSE3-LABEL: trunc_v4i32_shuffle:
1213; SSSE3:       # %bb.0:
1214; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
1215; SSSE3-NEXT:    retq
1216;
1217; SSE41-LABEL: trunc_v4i32_shuffle:
1218; SSE41:       # %bb.0:
1219; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
1220; SSE41-NEXT:    retq
1221;
1222; AVX1OR2-LABEL: trunc_v4i32_shuffle:
1223; AVX1OR2:       # %bb.0:
1224; AVX1OR2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
1225; AVX1OR2-NEXT:    retq
1226;
1227; AVX512VL-LABEL: trunc_v4i32_shuffle:
1228; AVX512VL:       # %bb.0:
1229; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
1230; AVX512VL-NEXT:    retq
1231  %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1232  ret <16 x i8> %shuffle
1233}
1234
1235define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
1236; We don't have anything useful to check here. This generates 100s of
1237; instructions. Instead, just make sure we survived codegen.
1238; ALL-LABEL: stress_test0:
1239; ALL:         retq
1240entry:
1241  %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
1242  %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
1243  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8>
1244  %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29>
1245  %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29>
1246  %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17>
1247  %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23>
1248  %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17>
1249  %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 poison, i32 20, i32 poison, i32 3, i32 27, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
1250  %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 poison, i32 poison, i32 31, i32 1, i32 poison, i32 10>
1251  ret <16 x i8> %s.16.0
1252}
1253
1254define <16 x i8> @poison_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
1255; There is nothing interesting to check about these instructions other than
1256; that they survive codegen. However, we actually do better and delete all of
1257; them because the result is 'poison'.
1258;
1259; ALL-LABEL: poison_test1:
1260; ALL:       # %bb.0: # %entry
1261; ALL-NEXT:    retq
1262entry:
1263  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> poison, <16 x i32> <i32 9, i32 9, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 6, i32 poison, i32 6, i32 poison, i32 14, i32 14, i32 poison, i32 poison, i32 0>
1264  %s.2.4 = shufflevector <16 x i8> poison, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 poison, i32 poison, i32 19, i32 poison, i32 poison, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 poison, i32 20, i32 22>
1265  %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> poison, <16 x i32> <i32 3, i32 8, i32 poison, i32 7, i32 poison, i32 10, i32 8, i32 0, i32 15, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 9>
1266  %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> poison, <16 x i32> <i32 7, i32 poison, i32 14, i32 7, i32 8, i32 poison, i32 7, i32 8, i32 5, i32 15, i32 poison, i32 1, i32 11, i32 poison, i32 poison, i32 11>
1267  %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29>
1268  %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> poison, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 poison, i32 4, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6, i32 poison>
1269  %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10>
1270  %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6, i32 10, i32 poison, i32 0, i32 5, i32 poison, i32 9, i32 poison>
1271  %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1272  %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 28, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1273  %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 10, i32 5>
1274  %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1275  %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 13, i32 poison, i32 poison, i32 poison>
1276
1277  ret <16 x i8> %s.12.4
1278}
1279
1280define <16 x i8> @PR20540(<8 x i8> %a) {
1281; SSE-LABEL: PR20540:
1282; SSE:       # %bb.0:
1283; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
1284; SSE-NEXT:    retq
1285;
1286; AVX-LABEL: PR20540:
1287; AVX:       # %bb.0:
1288; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1289; AVX-NEXT:    retq
1290  %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1291  ret <16 x i8> %shuffle
1292}
1293
1294define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
1295; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1296; SSE:       # %bb.0:
1297; SSE-NEXT:    movzbl %dil, %eax
1298; SSE-NEXT:    movd %eax, %xmm0
1299; SSE-NEXT:    retq
1300;
1301; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1302; AVX:       # %bb.0:
1303; AVX-NEXT:    movzbl %dil, %eax
1304; AVX-NEXT:    vmovd %eax, %xmm0
1305; AVX-NEXT:    retq
1306  %a = insertelement <16 x i8> poison, i8 %i, i32 0
1307  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1308  ret <16 x i8> %shuffle
1309}
1310
1311define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
1312; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1313; SSE2:       # %bb.0:
1314; SSE2-NEXT:    shll $8, %edi
1315; SSE2-NEXT:    pxor %xmm0, %xmm0
1316; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
1317; SSE2-NEXT:    retq
1318;
1319; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1320; SSSE3:       # %bb.0:
1321; SSSE3-NEXT:    shll $8, %edi
1322; SSSE3-NEXT:    pxor %xmm0, %xmm0
1323; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
1324; SSSE3-NEXT:    retq
1325;
1326; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1327; SSE41:       # %bb.0:
1328; SSE41-NEXT:    pxor %xmm0, %xmm0
1329; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
1330; SSE41-NEXT:    retq
1331;
1332; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1333; AVX:       # %bb.0:
1334; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1335; AVX-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
1336; AVX-NEXT:    retq
1337  %a = insertelement <16 x i8> poison, i8 %i, i32 0
1338  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1339  ret <16 x i8> %shuffle
1340}
1341
1342define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
1343; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
1344; SSE2:       # %bb.0:
1345; SSE2-NEXT:    shll $8, %edi
1346; SSE2-NEXT:    pxor %xmm0, %xmm0
1347; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
1348; SSE2-NEXT:    retq
1349;
1350; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
1351; SSSE3:       # %bb.0:
1352; SSSE3-NEXT:    shll $8, %edi
1353; SSSE3-NEXT:    pxor %xmm0, %xmm0
1354; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
1355; SSSE3-NEXT:    retq
1356;
1357; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
1358; SSE41:       # %bb.0:
1359; SSE41-NEXT:    pxor %xmm0, %xmm0
1360; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
1361; SSE41-NEXT:    retq
1362;
1363; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
1364; AVX:       # %bb.0:
1365; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1366; AVX-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
1367; AVX-NEXT:    retq
1368  %a = insertelement <16 x i8> poison, i8 %i, i32 0
1369  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
1370  ret <16 x i8> %shuffle
1371}
1372
1373define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
1374; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1375; SSE2:       # %bb.0:
1376; SSE2-NEXT:    movzbl %dil, %eax
1377; SSE2-NEXT:    pxor %xmm0, %xmm0
1378; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
1379; SSE2-NEXT:    retq
1380;
1381; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1382; SSSE3:       # %bb.0:
1383; SSSE3-NEXT:    movzbl %dil, %eax
1384; SSSE3-NEXT:    pxor %xmm0, %xmm0
1385; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
1386; SSSE3-NEXT:    retq
1387;
1388; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1389; SSE41:       # %bb.0:
1390; SSE41-NEXT:    pxor %xmm0, %xmm0
1391; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
1392; SSE41-NEXT:    retq
1393;
1394; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1395; AVX:       # %bb.0:
1396; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1397; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
1398; AVX-NEXT:    retq
1399  %a = insertelement <16 x i8> poison, i8 %i, i32 3
1400  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1401  ret <16 x i8> %shuffle
1402}
1403
1404define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
1405; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
1406; SSE:       # %bb.0:
1407; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1408; SSE-NEXT:    retq
1409;
1410; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
1411; AVX:       # %bb.0:
1412; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1413; AVX-NEXT:    retq
1414  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison, i32 18, i32 poison>
1415  ret <16 x i8> %shuffle
1416}
1417
1418define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
1419; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1420; SSE:       # %bb.0:
1421; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1422; SSE-NEXT:    retq
1423;
1424; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
1425; AVX:       # %bb.0:
1426; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1427; AVX-NEXT:    retq
1428  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 poison, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
1429  ret <16 x i8> %shuffle
1430}
1431
1432define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
1433; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1434; SSE2:       # %bb.0:
1435; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1436; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1437; SSE2-NEXT:    por %xmm1, %xmm0
1438; SSE2-NEXT:    retq
1439;
1440; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1441; SSSE3:       # %bb.0:
1442; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1443; SSSE3-NEXT:    retq
1444;
1445; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1446; SSE41:       # %bb.0:
1447; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1448; SSE41-NEXT:    retq
1449;
1450; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1451; AVX:       # %bb.0:
1452; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1453; AVX-NEXT:    retq
1454  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
1455  ret <16 x i8> %shuffle
1456}
1457
1458define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
1459; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1460; SSE2:       # %bb.0:
1461; SSE2-NEXT:    movdqa %xmm0, %xmm1
1462; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1463; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1464; SSE2-NEXT:    por %xmm1, %xmm0
1465; SSE2-NEXT:    retq
1466;
1467; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1468; SSSE3:       # %bb.0:
1469; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1470; SSSE3-NEXT:    retq
1471;
1472; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1473; SSE41:       # %bb.0:
1474; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1475; SSE41-NEXT:    retq
1476;
1477; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
1478; AVX:       # %bb.0:
1479; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1480; AVX-NEXT:    retq
1481  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
1482  ret <16 x i8> %shuffle
1483}
1484
1485define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
1486; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
1487; SSE2:       # %bb.0:
1488; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1489; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
1490; SSE2-NEXT:    por %xmm1, %xmm0
1491; SSE2-NEXT:    retq
1492;
1493; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
1494; SSSE3:       # %bb.0:
1495; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
1496; SSSE3-NEXT:    retq
1497;
1498; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
1499; SSE41:       # %bb.0:
1500; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
1501; SSE41-NEXT:    retq
1502;
1503; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
1504; AVX:       # %bb.0:
1505; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
1506; AVX-NEXT:    retq
1507  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
1508  ret <16 x i8> %shuffle
1509}
1510
1511define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
1512; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
1513; SSE2:       # %bb.0:
1514; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1515; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
1516; SSE2-NEXT:    por %xmm1, %xmm0
1517; SSE2-NEXT:    retq
1518;
1519; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
1520; SSSE3:       # %bb.0:
1521; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
1522; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1523; SSSE3-NEXT:    retq
1524;
1525; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
1526; SSE41:       # %bb.0:
1527; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
1528; SSE41-NEXT:    movdqa %xmm1, %xmm0
1529; SSE41-NEXT:    retq
1530;
1531; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
1532; AVX:       # %bb.0:
1533; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
1534; AVX-NEXT:    retq
1535  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
1536  ret <16 x i8> %shuffle
1537}
1538
1539define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
1540; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
1541; SSE2:       # %bb.0:
1542; SSE2-NEXT:    movdqa %xmm0, %xmm1
1543; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1544; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
1545; SSE2-NEXT:    por %xmm1, %xmm0
1546; SSE2-NEXT:    retq
1547;
1548; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
1549; SSSE3:       # %bb.0:
1550; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
1551; SSSE3-NEXT:    retq
1552;
1553; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
1554; SSE41:       # %bb.0:
1555; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
1556; SSE41-NEXT:    retq
1557;
1558; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
1559; AVX:       # %bb.0:
1560; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
1561; AVX-NEXT:    retq
1562  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
1563  ret <16 x i8> %shuffle
1564}
1565
1566define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
1567; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
1568; SSE2:       # %bb.0:
1569; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1570; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1571; SSE2-NEXT:    por %xmm1, %xmm0
1572; SSE2-NEXT:    retq
1573;
1574; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
1575; SSSE3:       # %bb.0:
1576; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1577; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1578; SSSE3-NEXT:    retq
1579;
1580; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
1581; SSE41:       # %bb.0:
1582; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1583; SSE41-NEXT:    movdqa %xmm1, %xmm0
1584; SSE41-NEXT:    retq
1585;
1586; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
1587; AVX:       # %bb.0:
1588; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1589; AVX-NEXT:    retq
1590  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
1591  ret <16 x i8> %shuffle
1592}
1593
1594; PR31151
1595define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(<16 x i8> %val1, <16 x i8> %val2) {
1596; SSE-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
1597; SSE:       # %bb.0:
1598; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1599; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1600; SSE-NEXT:    retq
1601;
1602; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
1603; AVX:       # %bb.0:
1604; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1605; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1606; AVX-NEXT:    retq
1607  %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23>
1608  ret <16 x i8> %shuffle
1609}
1610
1611define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
1612; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1613; SSE2:       # %bb.0:
1614; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1615; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1616; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1617; SSE2-NEXT:    retq
1618;
1619; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1620; SSSE3:       # %bb.0:
1621; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,u,u,u,u,u,u,u,1,u,u,u,u,u,u,u]
1622; SSSE3-NEXT:    retq
1623;
1624; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1625; SSE41:       # %bb.0:
1626; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1627; SSE41-NEXT:    retq
1628;
1629; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
1630; AVX:       # %bb.0:
1631; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1632; AVX-NEXT:    retq
1633  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1634  ret <16 x i8> %shuffle
1635}
1636
1637define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
1638; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1639; SSE2:       # %bb.0:
1640; SSE2-NEXT:    pxor %xmm1, %xmm1
1641; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1642; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1643; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1644; SSE2-NEXT:    retq
1645;
1646; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1647; SSSE3:       # %bb.0:
1648; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1649; SSSE3-NEXT:    retq
1650;
1651; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1652; SSE41:       # %bb.0:
1653; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1654; SSE41-NEXT:    retq
1655;
1656; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
1657; AVX:       # %bb.0:
1658; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1659; AVX-NEXT:    retq
1660  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1661  ret <16 x i8> %shuffle
1662}
1663
1664define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
1665; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1666; SSE2:       # %bb.0:
1667; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1668; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1669; SSE2-NEXT:    retq
1670;
1671; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1672; SSSE3:       # %bb.0:
1673; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1674; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1675; SSSE3-NEXT:    retq
1676;
1677; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1678; SSE41:       # %bb.0:
1679; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1680; SSE41-NEXT:    retq
1681;
1682; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
1683; AVX:       # %bb.0:
1684; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1685; AVX-NEXT:    retq
1686  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
1687  ret <16 x i8> %shuffle
1688}
1689
1690define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
1691; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1692; SSE2:       # %bb.0:
1693; SSE2-NEXT:    pxor %xmm1, %xmm1
1694; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1695; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1696; SSE2-NEXT:    retq
1697;
1698; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1699; SSSE3:       # %bb.0:
1700; SSSE3-NEXT:    pxor %xmm1, %xmm1
1701; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1702; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1703; SSSE3-NEXT:    retq
1704;
1705; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1706; SSE41:       # %bb.0:
1707; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1708; SSE41-NEXT:    retq
1709;
1710; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
1711; AVX:       # %bb.0:
1712; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1713; AVX-NEXT:    retq
1714  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
1715  ret <16 x i8> %shuffle
1716}
1717
1718define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
1719; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1720; SSE2:       # %bb.0:
1721; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1722; SSE2-NEXT:    retq
1723;
1724; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1725; SSSE3:       # %bb.0:
1726; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1727; SSSE3-NEXT:    retq
1728;
1729; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1730; SSE41:       # %bb.0:
1731; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1732; SSE41-NEXT:    retq
1733;
1734; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
1735; AVX:       # %bb.0:
1736; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1737; AVX-NEXT:    retq
1738  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison, i32 4, i32 poison, i32 5, i32 poison, i32 6, i32 poison, i32 7, i32 poison>
1739  ret <16 x i8> %shuffle
1740}
1741
1742define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
1743; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1744; SSE2:       # %bb.0:
1745; SSE2-NEXT:    pxor %xmm1, %xmm1
1746; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1747; SSE2-NEXT:    retq
1748;
1749; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1750; SSSE3:       # %bb.0:
1751; SSSE3-NEXT:    pxor %xmm1, %xmm1
1752; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1753; SSSE3-NEXT:    retq
1754;
1755; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1756; SSE41:       # %bb.0:
1757; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1758; SSE41-NEXT:    retq
1759;
1760; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
1761; AVX:       # %bb.0:
1762; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1763; AVX-NEXT:    retq
1764  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
1765  ret <16 x i8> %shuffle
1766}
1767
1768define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
1769; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1770; SSE2:       # %bb.0: # %entry
1771; SSE2-NEXT:    pxor %xmm2, %xmm2
1772; SSE2-NEXT:    movdqa %xmm0, %xmm3
1773; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1774; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm3[1,3,2,0,4,5,6,7]
1775; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,0,2,1]
1776; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
1777; SSE2-NEXT:    pand %xmm5, %xmm4
1778; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1779; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,3,0,1]
1780; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[2,2,2,2,4,5,6,7]
1781; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
1782; SSE2-NEXT:    pandn %xmm2, %xmm5
1783; SSE2-NEXT:    por %xmm4, %xmm5
1784; SSE2-NEXT:    psrlq $16, %xmm0
1785; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1786; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,1,3]
1787; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1788; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
1789; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1790; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1791; SSE2-NEXT:    packuswb %xmm5, %xmm2
1792; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
1793; SSE2-NEXT:    pand %xmm0, %xmm2
1794; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
1795; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1796; SSE2-NEXT:    pandn %xmm1, %xmm0
1797; SSE2-NEXT:    por %xmm2, %xmm0
1798; SSE2-NEXT:    retq
1799;
1800; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1801; SSSE3:       # %bb.0: # %entry
1802; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1803; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1804; SSSE3-NEXT:    por %xmm1, %xmm0
1805; SSSE3-NEXT:    retq
1806;
1807; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1808; SSE41:       # %bb.0: # %entry
1809; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1810; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1811; SSE41-NEXT:    por %xmm1, %xmm0
1812; SSE41-NEXT:    retq
1813;
1814; AVX1-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1815; AVX1:       # %bb.0: # %entry
1816; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1817; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1818; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1819; AVX1-NEXT:    retq
1820;
1821; AVX2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1822; AVX2:       # %bb.0: # %entry
1823; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1824; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1825; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1826; AVX2-NEXT:    retq
1827;
1828; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1829; AVX512VLBW:       # %bb.0: # %entry
1830; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1831; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1832; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1833; AVX512VLBW-NEXT:    retq
1834;
1835; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1836; AVX512VLVBMI:       # %bb.0: # %entry
1837; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0]
1838; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
1839; AVX512VLVBMI-NEXT:    retq
1840;
1841; XOP-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
1842; XOP:       # %bb.0: # %entry
1843; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[u,10,2,7],xmm1[6],xmm0[14,7,2],xmm1[2],xmm0[3,1,14],xmm1[2],xmm0[9,11,0]
1844; XOP-NEXT:    retq
1845entry:
1846  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 poison, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
1847
1848  ret <16 x i8> %shuffle
1849}
1850
1851define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) {
1852; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1853; SSE:       # %bb.0:
1854; SSE-NEXT:    psrlw $8, %xmm0
1855; SSE-NEXT:    psrlw $8, %xmm1
1856; SSE-NEXT:    packuswb %xmm1, %xmm0
1857; SSE-NEXT:    retq
1858;
1859; AVX1-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1860; AVX1:       # %bb.0:
1861; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
1862; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
1863; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
1864; AVX1-NEXT:    retq
1865;
1866; AVX2OR512VL-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1867; AVX2OR512VL:       # %bb.0:
1868; AVX2OR512VL-NEXT:    vpsrlw $8, %xmm0, %xmm0
1869; AVX2OR512VL-NEXT:    vpsrlw $8, %xmm1, %xmm1
1870; AVX2OR512VL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
1871; AVX2OR512VL-NEXT:    retq
1872;
1873; XOP-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
1874; XOP:       # %bb.0:
1875; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15]
1876; XOP-NEXT:    retq
1877  %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1878  %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1879  %3 = bitcast <8 x i16> %1 to <16 x i8>
1880  %4 = bitcast <8 x i16> %2 to <16 x i8>
1881  %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1882  ret <16 x i8> %5
1883}
1884
1885define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
1886; Nothing interesting to test here. Just make sure we didn't crashe.
1887; ALL-LABEL: stress_test2:
1888; ALL:         retq
1889entry:
1890  %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
1891  %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
1892  %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 poison, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19>
1893
1894  ret <16 x i8> %s.2.0
1895}
1896
1897define void @constant_gets_selected(ptr %ptr1, ptr %ptr2) {
1898; SSE-LABEL: constant_gets_selected:
1899; SSE:       # %bb.0: # %entry
1900; SSE-NEXT:    xorps %xmm0, %xmm0
1901; SSE-NEXT:    movaps %xmm0, (%rdi)
1902; SSE-NEXT:    movaps %xmm0, (%rsi)
1903; SSE-NEXT:    retq
1904;
1905; AVX-LABEL: constant_gets_selected:
1906; AVX:       # %bb.0: # %entry
1907; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1908; AVX-NEXT:    vmovaps %xmm0, (%rdi)
1909; AVX-NEXT:    vmovaps %xmm0, (%rsi)
1910; AVX-NEXT:    retq
1911entry:
1912  %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
1913  %shuffle.i = shufflevector <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
1914  %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32>
1915  store <4 x i32> %weirder_zero, ptr %ptr1, align 16
1916  store <4 x i32> zeroinitializer, ptr %ptr2, align 16
1917  ret void
1918}
1919
1920;
1921; Shuffle to logical bit shifts
1922;
1923
1924define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
1925; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1926; SSE:       # %bb.0:
1927; SSE-NEXT:    psllw $8, %xmm0
1928; SSE-NEXT:    retq
1929;
1930; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1931; AVX:       # %bb.0:
1932; AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
1933; AVX-NEXT:    retq
1934  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
1935  ret <16 x i8> %shuffle
1936}
1937
1938define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
1939; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1940; SSE:       # %bb.0:
1941; SSE-NEXT:    pslld $24, %xmm0
1942; SSE-NEXT:    retq
1943;
1944; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1945; AVX:       # %bb.0:
1946; AVX-NEXT:    vpslld $24, %xmm0, %xmm0
1947; AVX-NEXT:    retq
1948  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
1949  ret <16 x i8> %shuffle
1950}
1951
1952define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
1953; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1954; SSE:       # %bb.0:
1955; SSE-NEXT:    psllq $56, %xmm0
1956; SSE-NEXT:    retq
1957;
1958; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1959; AVX:       # %bb.0:
1960; AVX-NEXT:    vpsllq $56, %xmm0, %xmm0
1961; AVX-NEXT:    retq
1962  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
1963  ret <16 x i8> %shuffle
1964}
1965
1966define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
1967; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1968; SSE:       # %bb.0:
1969; SSE-NEXT:    psllq $8, %xmm0
1970; SSE-NEXT:    retq
1971;
1972; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1973; AVX:       # %bb.0:
1974; AVX-NEXT:    vpsllq $8, %xmm0, %xmm0
1975; AVX-NEXT:    retq
1976  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 poison, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 16, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14>
1977  ret <16 x i8> %shuffle
1978}
1979
1980define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
1981; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1982; SSE:       # %bb.0:
1983; SSE-NEXT:    psrlw $8, %xmm0
1984; SSE-NEXT:    retq
1985;
1986; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1987; AVX:       # %bb.0:
1988; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
1989; AVX-NEXT:    retq
1990  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 poison, i32 16, i32 poison, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
1991  ret <16 x i8> %shuffle
1992}
1993
1994define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
1995; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
1996; SSE:       # %bb.0:
1997; SSE-NEXT:    psrld $16, %xmm0
1998; SSE-NEXT:    retq
1999;
2000; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
2001; AVX:       # %bb.0:
2002; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
2003; AVX-NEXT:    retq
2004  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 16, i32 16>
2005  ret <16 x i8> %shuffle
2006}
2007
2008define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
2009; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
2010; SSE:       # %bb.0:
2011; SSE-NEXT:    psrlq $56, %xmm0
2012; SSE-NEXT:    retq
2013;
2014; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
2015; AVX:       # %bb.0:
2016; AVX-NEXT:    vpsrlq $56, %xmm0, %xmm0
2017; AVX-NEXT:    retq
2018  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16>
2019  ret <16 x i8> %shuffle
2020}
2021
2022define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
2023; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
2024; SSE2:       # %bb.0:
2025; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
2026; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2027; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
2028; SSE2-NEXT:    retq
2029;
2030; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
2031; SSSE3:       # %bb.0:
2032; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero
2033; SSSE3-NEXT:    retq
2034;
2035; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
2036; SSE41:       # %bb.0:
2037; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero
2038; SSE41-NEXT:    retq
2039;
2040; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
2041; AVX:       # %bb.0:
2042; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero
2043; AVX-NEXT:    retq
2044  %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2045  ret <16 x i8> %shuffle
2046}
2047
2048define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
2049; SSE-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2050; SSE:       # %bb.0:
2051; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
2052; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2053; SSE-NEXT:    retq
2054;
2055; AVX1-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2056; AVX1:       # %bb.0:
2057; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
2058; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2059; AVX1-NEXT:    retq
2060;
2061; AVX2-SLOW-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2062; AVX2-SLOW:       # %bb.0:
2063; AVX2-SLOW-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
2064; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2065; AVX2-SLOW-NEXT:    retq
2066;
2067; AVX2-FAST-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2068; AVX2-FAST:       # %bb.0:
2069; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2070; AVX2-FAST-NEXT:    retq
2071;
2072; AVX512VL-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2073; AVX512VL:       # %bb.0:
2074; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2075; AVX512VL-NEXT:    retq
2076;
2077; XOP-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
2078; XOP:       # %bb.0:
2079; XOP-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
2080; XOP-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2081; XOP-NEXT:    retq
2082  %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2083  ret <16 x i8> %shuffle
2084}
2085
2086define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06(<16 x i8> %a) {
2087; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2088; SSE:       # %bb.0:
2089; SSE-NEXT:    psrlq $8, %xmm0
2090; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2091; SSE-NEXT:    retq
2092;
2093; AVX1-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2094; AVX1:       # %bb.0:
2095; AVX1-NEXT:    vpsrlq $8, %xmm0, %xmm0
2096; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2097; AVX1-NEXT:    retq
2098;
2099; AVX2-SLOW-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2100; AVX2-SLOW:       # %bb.0:
2101; AVX2-SLOW-NEXT:    vpsrlq $8, %xmm0, %xmm0
2102; AVX2-SLOW-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2103; AVX2-SLOW-NEXT:    retq
2104;
2105; AVX2-FAST-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2106; AVX2-FAST:       # %bb.0:
2107; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
2108; AVX2-FAST-NEXT:    retq
2109;
2110; AVX512VL-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2111; AVX512VL:       # %bb.0:
2112; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
2113; AVX512VL-NEXT:    retq
2114;
2115; XOP-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
2116; XOP:       # %bb.0:
2117; XOP-NEXT:    vpsrlq $8, %xmm0, %xmm0
2118; XOP-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
2119; XOP-NEXT:    retq
2120  %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
2121  ret <16 x i8> %shuffle
2122}
2123
2124define <16 x i8> @shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i8> %a) {
2125; SSE2-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2126; SSE2:       # %bb.0:
2127; SSE2-NEXT:    movdqa %xmm0, %xmm1
2128; SSE2-NEXT:    psrld $24, %xmm1
2129; SSE2-NEXT:    pslld $8, %xmm0
2130; SSE2-NEXT:    por %xmm1, %xmm0
2131; SSE2-NEXT:    retq
2132;
2133; SSSE3-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2134; SSSE3:       # %bb.0:
2135; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
2136; SSSE3-NEXT:    retq
2137;
2138; SSE41-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2139; SSE41:       # %bb.0:
2140; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
2141; SSE41-NEXT:    retq
2142;
2143; AVX1-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2144; AVX1:       # %bb.0:
2145; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
2146; AVX1-NEXT:    retq
2147;
2148; AVX2-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2149; AVX2:       # %bb.0:
2150; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
2151; AVX2-NEXT:    retq
2152;
2153; AVX512VL-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2154; AVX512VL:       # %bb.0:
2155; AVX512VL-NEXT:    vprold $8, %xmm0, %xmm0
2156; AVX512VL-NEXT:    retq
2157;
2158; XOP-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14:
2159; XOP:       # %bb.0:
2160; XOP-NEXT:    vprotd $8, %xmm0, %xmm0
2161; XOP-NEXT:    retq
2162  %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14>
2163  ret <16 x i8> %shuffle
2164}
2165
2166; PR44379
2167define <16 x i8> @shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09(<16 x i8> %a) {
2168; SSE-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2169; SSE:       # %bb.0:
2170; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2171; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2172; SSE-NEXT:    retq
2173;
2174; AVX1-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2175; AVX1:       # %bb.0:
2176; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2177; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2178; AVX1-NEXT:    retq
2179;
2180; AVX2-SLOW-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2181; AVX2-SLOW:       # %bb.0:
2182; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
2183; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
2184; AVX2-SLOW-NEXT:    retq
2185;
2186; AVX2-FAST-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2187; AVX2-FAST:       # %bb.0:
2188; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9]
2189; AVX2-FAST-NEXT:    retq
2190;
2191; AVX512VL-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2192; AVX512VL:       # %bb.0:
2193; AVX512VL-NEXT:    vprolq $48, %xmm0, %xmm0
2194; AVX512VL-NEXT:    retq
2195;
2196; XOP-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09:
2197; XOP:       # %bb.0:
2198; XOP-NEXT:    vprotq $48, %xmm0, %xmm0
2199; XOP-NEXT:    retq
2200  %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9>
2201  ret <16 x i8> %shuffle
2202}
2203
2204define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
2205; SSE2-LABEL: PR12412:
2206; SSE2:       # %bb.0: # %entry
2207; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2208; SSE2-NEXT:    pand %xmm2, %xmm1
2209; SSE2-NEXT:    pand %xmm2, %xmm0
2210; SSE2-NEXT:    packuswb %xmm1, %xmm0
2211; SSE2-NEXT:    retq
2212;
2213; SSSE3-LABEL: PR12412:
2214; SSSE3:       # %bb.0: # %entry
2215; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2216; SSSE3-NEXT:    pand %xmm2, %xmm1
2217; SSSE3-NEXT:    pand %xmm2, %xmm0
2218; SSSE3-NEXT:    packuswb %xmm1, %xmm0
2219; SSSE3-NEXT:    retq
2220;
2221; SSE41-LABEL: PR12412:
2222; SSE41:       # %bb.0: # %entry
2223; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2224; SSE41-NEXT:    pand %xmm2, %xmm1
2225; SSE41-NEXT:    pand %xmm2, %xmm0
2226; SSE41-NEXT:    packuswb %xmm1, %xmm0
2227; SSE41-NEXT:    retq
2228;
2229; AVX1-LABEL: PR12412:
2230; AVX1:       # %bb.0: # %entry
2231; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2232; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
2233; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
2234; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2235; AVX1-NEXT:    retq
2236;
2237; AVX2-LABEL: PR12412:
2238; AVX2:       # %bb.0: # %entry
2239; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2240; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
2241; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
2242; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2243; AVX2-NEXT:    retq
2244;
2245; AVX512VL-LABEL: PR12412:
2246; AVX512VL:       # %bb.0: # %entry
2247; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2248; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2249; AVX512VL-NEXT:    vpmovwb %ymm0, %xmm0
2250; AVX512VL-NEXT:    vzeroupper
2251; AVX512VL-NEXT:    retq
2252;
2253; XOP-LABEL: PR12412:
2254; XOP:       # %bb.0: # %entry
2255; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
2256; XOP-NEXT:    retq
2257entry:
2258  %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
2259  ret <16 x i8> %0
2260}
2261
2262define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) {
2263; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
2264; SSE:       # %bb.0:
2265; SSE-NEXT:    psrld $8, %xmm0
2266; SSE-NEXT:    retq
2267;
2268; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
2269; AVX:       # %bb.0:
2270; AVX-NEXT:    vpsrld $8, %xmm0, %xmm0
2271; AVX-NEXT:    retq
2272  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 poison, i32 2, i32 3, i32 16, i32 poison, i32 6, i32 7, i32 16, i32 poison, i32 10, i32 11, i32 16, i32 poison, i32 14, i32 15, i32 16>
2273  ret <16 x i8> %shuffle
2274}
2275
2276define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
2277; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
2278; SSE:       # %bb.0:
2279; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2280; SSE-NEXT:    retq
2281;
2282; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
2283; AVX:       # %bb.0:
2284; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2285; AVX-NEXT:    retq
2286  %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
2287  %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
2288  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2289  %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
2290  %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
2291  %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
2292  ret <16 x i8> %bitcast8
2293}
2294
2295define <16 x i8> @insert_dup_mem_v16i8_i32(ptr %ptr) {
2296; SSE2-LABEL: insert_dup_mem_v16i8_i32:
2297; SSE2:       # %bb.0:
2298; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2299; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2300; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2301; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2302; SSE2-NEXT:    retq
2303;
2304; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
2305; SSSE3:       # %bb.0:
2306; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2307; SSSE3-NEXT:    pxor %xmm1, %xmm1
2308; SSSE3-NEXT:    pshufb %xmm1, %xmm0
2309; SSSE3-NEXT:    retq
2310;
2311; SSE41-LABEL: insert_dup_mem_v16i8_i32:
2312; SSE41:       # %bb.0:
2313; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2314; SSE41-NEXT:    pxor %xmm1, %xmm1
2315; SSE41-NEXT:    pshufb %xmm1, %xmm0
2316; SSE41-NEXT:    retq
2317;
2318; AVX1-LABEL: insert_dup_mem_v16i8_i32:
2319; AVX1:       # %bb.0:
2320; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2321; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2322; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2323; AVX1-NEXT:    retq
2324;
2325; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_i32:
2326; AVX2OR512VL:       # %bb.0:
2327; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
2328; AVX2OR512VL-NEXT:    retq
2329;
2330; XOPAVX1-LABEL: insert_dup_mem_v16i8_i32:
2331; XOPAVX1:       # %bb.0:
2332; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2333; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2334; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2335; XOPAVX1-NEXT:    retq
2336;
2337; XOPAVX2-LABEL: insert_dup_mem_v16i8_i32:
2338; XOPAVX2:       # %bb.0:
2339; XOPAVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
2340; XOPAVX2-NEXT:    retq
2341  %tmp = load i32, ptr %ptr, align 4
2342  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2343  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
2344  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> zeroinitializer
2345  ret <16 x i8> %tmp3
2346}
2347
2348define <16 x i8> @insert_dup_mem_v16i8_sext_i8(ptr %ptr) {
2349; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
2350; SSE2:       # %bb.0:
2351; SSE2-NEXT:    movzbl (%rdi), %eax
2352; SSE2-NEXT:    movd %eax, %xmm0
2353; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2354; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2355; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2356; SSE2-NEXT:    retq
2357;
2358; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
2359; SSSE3:       # %bb.0:
2360; SSSE3-NEXT:    movzbl (%rdi), %eax
2361; SSSE3-NEXT:    movd %eax, %xmm0
2362; SSSE3-NEXT:    pxor %xmm1, %xmm1
2363; SSSE3-NEXT:    pshufb %xmm1, %xmm0
2364; SSSE3-NEXT:    retq
2365;
2366; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
2367; SSE41:       # %bb.0:
2368; SSE41-NEXT:    movzbl (%rdi), %eax
2369; SSE41-NEXT:    movd %eax, %xmm0
2370; SSE41-NEXT:    pxor %xmm1, %xmm1
2371; SSE41-NEXT:    pshufb %xmm1, %xmm0
2372; SSE41-NEXT:    retq
2373;
2374; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
2375; AVX1:       # %bb.0:
2376; AVX1-NEXT:    movzbl (%rdi), %eax
2377; AVX1-NEXT:    vmovd %eax, %xmm0
2378; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2379; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2380; AVX1-NEXT:    retq
2381;
2382; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_sext_i8:
2383; AVX2OR512VL:       # %bb.0:
2384; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
2385; AVX2OR512VL-NEXT:    retq
2386;
2387; XOPAVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
2388; XOPAVX1:       # %bb.0:
2389; XOPAVX1-NEXT:    movzbl (%rdi), %eax
2390; XOPAVX1-NEXT:    vmovd %eax, %xmm0
2391; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2392; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2393; XOPAVX1-NEXT:    retq
2394;
2395; XOPAVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
2396; XOPAVX2:       # %bb.0:
2397; XOPAVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
2398; XOPAVX2-NEXT:    retq
2399  %tmp = load i8, ptr %ptr, align 1
2400  %tmp1 = sext i8 %tmp to i32
2401  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2402  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
2403  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> zeroinitializer
2404  ret <16 x i8> %tmp4
2405}
2406
2407define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(ptr %ptr) {
2408; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
2409; SSE2:       # %bb.0:
2410; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2411; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2412; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2413; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2414; SSE2-NEXT:    retq
2415;
2416; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
2417; SSSE3:       # %bb.0:
2418; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2419; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2420; SSSE3-NEXT:    retq
2421;
2422; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
2423; SSE41:       # %bb.0:
2424; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2425; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2426; SSE41-NEXT:    retq
2427;
2428; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
2429; AVX1:       # %bb.0:
2430; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2431; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2432; AVX1-NEXT:    retq
2433;
2434; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i8_i32:
2435; AVX2OR512VL:       # %bb.0:
2436; AVX2OR512VL-NEXT:    vpbroadcastb 1(%rdi), %xmm0
2437; AVX2OR512VL-NEXT:    retq
2438;
2439; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
2440; XOPAVX1:       # %bb.0:
2441; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2442; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2443; XOPAVX1-NEXT:    retq
2444;
2445; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i8_i32:
2446; XOPAVX2:       # %bb.0:
2447; XOPAVX2-NEXT:    vpbroadcastb 1(%rdi), %xmm0
2448; XOPAVX2-NEXT:    retq
2449  %tmp = load i32, ptr %ptr, align 4
2450  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2451  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
2452  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2453  ret <16 x i8> %tmp3
2454}
2455
2456define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(ptr %ptr) {
2457; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
2458; SSE2:       # %bb.0:
2459; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2460; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2461; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
2462; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2463; SSE2-NEXT:    retq
2464;
2465; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
2466; SSSE3:       # %bb.0:
2467; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2468; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2469; SSSE3-NEXT:    retq
2470;
2471; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
2472; SSE41:       # %bb.0:
2473; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2474; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2475; SSE41-NEXT:    retq
2476;
2477; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
2478; AVX1:       # %bb.0:
2479; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2480; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2481; AVX1-NEXT:    retq
2482;
2483; AVX2OR512VL-LABEL: insert_dup_elt2_mem_v16i8_i32:
2484; AVX2OR512VL:       # %bb.0:
2485; AVX2OR512VL-NEXT:    vpbroadcastb 2(%rdi), %xmm0
2486; AVX2OR512VL-NEXT:    retq
2487;
2488; XOPAVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
2489; XOPAVX1:       # %bb.0:
2490; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2491; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2492; XOPAVX1-NEXT:    retq
2493;
2494; XOPAVX2-LABEL: insert_dup_elt2_mem_v16i8_i32:
2495; XOPAVX2:       # %bb.0:
2496; XOPAVX2-NEXT:    vpbroadcastb 2(%rdi), %xmm0
2497; XOPAVX2-NEXT:    retq
2498  %tmp = load i32, ptr %ptr, align 4
2499  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2500  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
2501  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
2502  ret <16 x i8> %tmp3
2503}
2504
2505define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(ptr %ptr) {
2506; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2507; SSE2:       # %bb.0:
2508; SSE2-NEXT:    movsbl (%rdi), %eax
2509; SSE2-NEXT:    movd %eax, %xmm0
2510; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2511; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2512; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2513; SSE2-NEXT:    retq
2514;
2515; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2516; SSSE3:       # %bb.0:
2517; SSSE3-NEXT:    movsbl (%rdi), %eax
2518; SSSE3-NEXT:    movd %eax, %xmm0
2519; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2520; SSSE3-NEXT:    retq
2521;
2522; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2523; SSE41:       # %bb.0:
2524; SSE41-NEXT:    movsbl (%rdi), %eax
2525; SSE41-NEXT:    movd %eax, %xmm0
2526; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2527; SSE41-NEXT:    retq
2528;
2529; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2530; AVX1:       # %bb.0:
2531; AVX1-NEXT:    movsbl (%rdi), %eax
2532; AVX1-NEXT:    vmovd %eax, %xmm0
2533; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2534; AVX1-NEXT:    retq
2535;
2536; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2537; AVX2:       # %bb.0:
2538; AVX2-NEXT:    movsbl (%rdi), %eax
2539; AVX2-NEXT:    shrl $8, %eax
2540; AVX2-NEXT:    vmovd %eax, %xmm0
2541; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
2542; AVX2-NEXT:    retq
2543;
2544; AVX512VL-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2545; AVX512VL:       # %bb.0:
2546; AVX512VL-NEXT:    movsbl (%rdi), %eax
2547; AVX512VL-NEXT:    shrl $8, %eax
2548; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
2549; AVX512VL-NEXT:    retq
2550;
2551; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2552; XOPAVX1:       # %bb.0:
2553; XOPAVX1-NEXT:    movsbl (%rdi), %eax
2554; XOPAVX1-NEXT:    vmovd %eax, %xmm0
2555; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2556; XOPAVX1-NEXT:    retq
2557;
2558; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
2559; XOPAVX2:       # %bb.0:
2560; XOPAVX2-NEXT:    movsbl (%rdi), %eax
2561; XOPAVX2-NEXT:    shrl $8, %eax
2562; XOPAVX2-NEXT:    vmovd %eax, %xmm0
2563; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
2564; XOPAVX2-NEXT:    retq
2565  %tmp = load i8, ptr %ptr, align 1
2566  %tmp1 = sext i8 %tmp to i32
2567  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2568  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
2569  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2570  ret <16 x i8> %tmp4
2571}
2572
2573define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(ptr %ptr) {
2574; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2575; SSE2:       # %bb.0:
2576; SSE2-NEXT:    movsbl (%rdi), %eax
2577; SSE2-NEXT:    movd %eax, %xmm0
2578; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2579; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
2580; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2581; SSE2-NEXT:    retq
2582;
2583; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2584; SSSE3:       # %bb.0:
2585; SSSE3-NEXT:    movsbl (%rdi), %eax
2586; SSSE3-NEXT:    movd %eax, %xmm0
2587; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2588; SSSE3-NEXT:    retq
2589;
2590; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2591; SSE41:       # %bb.0:
2592; SSE41-NEXT:    movsbl (%rdi), %eax
2593; SSE41-NEXT:    movd %eax, %xmm0
2594; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2595; SSE41-NEXT:    retq
2596;
2597; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2598; AVX1:       # %bb.0:
2599; AVX1-NEXT:    movsbl (%rdi), %eax
2600; AVX1-NEXT:    vmovd %eax, %xmm0
2601; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2602; AVX1-NEXT:    retq
2603;
2604; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2605; AVX2:       # %bb.0:
2606; AVX2-NEXT:    movsbl (%rdi), %eax
2607; AVX2-NEXT:    shrl $16, %eax
2608; AVX2-NEXT:    vmovd %eax, %xmm0
2609; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
2610; AVX2-NEXT:    retq
2611;
2612; AVX512VL-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2613; AVX512VL:       # %bb.0:
2614; AVX512VL-NEXT:    movsbl (%rdi), %eax
2615; AVX512VL-NEXT:    shrl $16, %eax
2616; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
2617; AVX512VL-NEXT:    retq
2618;
2619; XOPAVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2620; XOPAVX1:       # %bb.0:
2621; XOPAVX1-NEXT:    movsbl (%rdi), %eax
2622; XOPAVX1-NEXT:    vmovd %eax, %xmm0
2623; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
2624; XOPAVX1-NEXT:    retq
2625;
2626; XOPAVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
2627; XOPAVX2:       # %bb.0:
2628; XOPAVX2-NEXT:    movsbl (%rdi), %eax
2629; XOPAVX2-NEXT:    shrl $16, %eax
2630; XOPAVX2-NEXT:    vmovd %eax, %xmm0
2631; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
2632; XOPAVX2-NEXT:    retq
2633  %tmp = load i8, ptr %ptr, align 1
2634  %tmp1 = sext i8 %tmp to i32
2635  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2636  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
2637  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
2638  ret <16 x i8> %tmp4
2639}
2640
2641define <16 x i8> @PR31364(ptr nocapture readonly %a, ptr nocapture readonly %b) {
2642; SSE2-LABEL: PR31364:
2643; SSE2:       # %bb.0:
2644; SSE2-NEXT:    movzbl (%rdi), %eax
2645; SSE2-NEXT:    movzbl (%rsi), %ecx
2646; SSE2-NEXT:    shll $8, %ecx
2647; SSE2-NEXT:    orl %eax, %ecx
2648; SSE2-NEXT:    movd %ecx, %xmm1
2649; SSE2-NEXT:    pxor %xmm0, %xmm0
2650; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2651; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7]
2652; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
2653; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2654; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
2655; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
2656; SSE2-NEXT:    packuswb %xmm1, %xmm0
2657; SSE2-NEXT:    retq
2658;
2659; SSSE3-LABEL: PR31364:
2660; SSSE3:       # %bb.0:
2661; SSSE3-NEXT:    movzbl (%rdi), %eax
2662; SSSE3-NEXT:    movzbl (%rsi), %ecx
2663; SSSE3-NEXT:    shll $8, %ecx
2664; SSSE3-NEXT:    orl %eax, %ecx
2665; SSSE3-NEXT:    movd %ecx, %xmm0
2666; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
2667; SSSE3-NEXT:    retq
2668;
2669; SSE41-LABEL: PR31364:
2670; SSE41:       # %bb.0:
2671; SSE41-NEXT:    movzbl (%rdi), %eax
2672; SSE41-NEXT:    movd %eax, %xmm0
2673; SSE41-NEXT:    pinsrb $1, (%rsi), %xmm0
2674; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
2675; SSE41-NEXT:    retq
2676;
2677; AVX-LABEL: PR31364:
2678; AVX:       # %bb.0:
2679; AVX-NEXT:    movzbl (%rdi), %eax
2680; AVX-NEXT:    vmovd %eax, %xmm0
2681; AVX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm0
2682; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
2683; AVX-NEXT:    retq
2684  %v0 = load i8, ptr %a, align 1
2685  %vecins = insertelement <16 x i8> <i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, i8 %v0, i32 0
2686  %v1 = load i8, ptr %b, align 1
2687  %vecins2 = insertelement <16 x i8> %vecins, i8 %v1, i32 1
2688  %result = shufflevector <16 x i8> %vecins2, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 3, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
2689  ret <16 x i8> %result
2690}
2691
2692define <16 x i8> @PR31301(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
2693; SSE2-LABEL: PR31301:
2694; SSE2:       # %bb.0: # %entry
2695; SSE2-NEXT:    movzbl (%rdi), %eax
2696; SSE2-NEXT:    movd %eax, %xmm0
2697; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2698; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2699; SSE2-NEXT:    movzbl (%rsi), %eax
2700; SSE2-NEXT:    movd %eax, %xmm1
2701; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2702; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2703; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2704; SSE2-NEXT:    retq
2705;
2706; SSSE3-LABEL: PR31301:
2707; SSSE3:       # %bb.0: # %entry
2708; SSSE3-NEXT:    movzbl (%rdi), %eax
2709; SSSE3-NEXT:    movd %eax, %xmm0
2710; SSSE3-NEXT:    pxor %xmm1, %xmm1
2711; SSSE3-NEXT:    pshufb %xmm1, %xmm0
2712; SSSE3-NEXT:    movzbl (%rsi), %eax
2713; SSSE3-NEXT:    movd %eax, %xmm2
2714; SSSE3-NEXT:    pshufb %xmm1, %xmm2
2715; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2716; SSSE3-NEXT:    retq
2717;
2718; SSE41-LABEL: PR31301:
2719; SSE41:       # %bb.0: # %entry
2720; SSE41-NEXT:    movzbl (%rdi), %eax
2721; SSE41-NEXT:    movd %eax, %xmm0
2722; SSE41-NEXT:    pxor %xmm1, %xmm1
2723; SSE41-NEXT:    pshufb %xmm1, %xmm0
2724; SSE41-NEXT:    movzbl (%rsi), %eax
2725; SSE41-NEXT:    movd %eax, %xmm2
2726; SSE41-NEXT:    pshufb %xmm1, %xmm2
2727; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2728; SSE41-NEXT:    retq
2729;
2730; AVX1-LABEL: PR31301:
2731; AVX1:       # %bb.0: # %entry
2732; AVX1-NEXT:    movzbl (%rdi), %eax
2733; AVX1-NEXT:    vmovd %eax, %xmm0
2734; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2735; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2736; AVX1-NEXT:    movzbl (%rsi), %eax
2737; AVX1-NEXT:    vmovd %eax, %xmm2
2738; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
2739; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2740; AVX1-NEXT:    retq
2741;
2742; AVX2OR512VL-LABEL: PR31301:
2743; AVX2OR512VL:       # %bb.0: # %entry
2744; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
2745; AVX2OR512VL-NEXT:    vpbroadcastb (%rsi), %xmm1
2746; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2747; AVX2OR512VL-NEXT:    retq
2748;
2749; XOPAVX1-LABEL: PR31301:
2750; XOPAVX1:       # %bb.0: # %entry
2751; XOPAVX1-NEXT:    movzbl (%rdi), %eax
2752; XOPAVX1-NEXT:    vmovd %eax, %xmm0
2753; XOPAVX1-NEXT:    movzbl (%rsi), %eax
2754; XOPAVX1-NEXT:    vmovd %eax, %xmm1
2755; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0]
2756; XOPAVX1-NEXT:    retq
2757;
2758; XOPAVX2-LABEL: PR31301:
2759; XOPAVX2:       # %bb.0: # %entry
2760; XOPAVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
2761; XOPAVX2-NEXT:    vpbroadcastb (%rsi), %xmm1
2762; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2763; XOPAVX2-NEXT:    retq
2764entry:
2765  %0 = load i8, ptr %x, align 1
2766  %1 = insertelement <16 x i8> poison, i8 %0, i32 0
2767  %lane = shufflevector <16 x i8> %1, <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2768  %2 = load i8, ptr %y, align 1
2769  %3 = insertelement <16 x i8> poison, i8 %2, i32 0
2770  %lane3 = shufflevector <16 x i8> %3, <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2771  %vzip.i = shufflevector <16 x i8> %lane, <16 x i8> %lane3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
2772  ret <16 x i8> %vzip.i
2773}
2774
2775define <8 x i16> @PR104482(<16 x i8> %i) {
2776; SSE2-LABEL: PR104482:
2777; SSE2:       # %bb.0:
2778; SSE2-NEXT:    pxor %xmm1, %xmm1
2779; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2780; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2781; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
2782; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
2783; SSE2-NEXT:    packuswb %xmm0, %xmm0
2784; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2785; SSE2-NEXT:    retq
2786;
2787; SSSE3-LABEL: PR104482:
2788; SSSE3:       # %bb.0:
2789; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero
2790; SSSE3-NEXT:    retq
2791;
2792; SSE41-LABEL: PR104482:
2793; SSE41:       # %bb.0:
2794; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero
2795; SSE41-NEXT:    retq
2796;
2797; AVX-LABEL: PR104482:
2798; AVX:       # %bb.0:
2799; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero
2800; AVX-NEXT:    retq
2801  %i7 = shufflevector <16 x i8> %i, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2802  %1 = bitcast <16 x i8> %i7 to <8 x i16>
2803  %i10 = shufflevector <8 x i16> %1, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
2804  %i11 = shufflevector <8 x i16> %i10, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
2805  %i12 = shufflevector <8 x i16> %i11, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
2806  ret <8 x i16> %i12
2807}
2808