xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll (revision 0c8efbe3a0bd8a46a27835cfefefdc05c4d679a5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-SLOW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-ALL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-PERLANE
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2
14
15define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
16; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
17; AVX1:       # %bb.0:
18; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
20; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
21; AVX1-NEXT:    retq
22;
23; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
24; AVX2OR512VL:       # %bb.0:
25; AVX2OR512VL-NEXT:    vpbroadcastb %xmm0, %ymm0
26; AVX2OR512VL-NEXT:    retq
27;
28; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
29; XOPAVX1:       # %bb.0:
30; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
32; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
33; XOPAVX1-NEXT:    retq
34;
35; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
36; XOPAVX2:       # %bb.0:
37; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
38; XOPAVX2-NEXT:    retq
39  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
40  ret <32 x i8> %shuffle
41}
42
43define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
44; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
45; AVX1:       # %bb.0:
46; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
47; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
48; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
49; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
50; AVX1-NEXT:    retq
51;
52; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
53; AVX2:       # %bb.0:
54; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
55; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
56; AVX2-NEXT:    retq
57;
58; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
59; AVX512VLBW:       # %bb.0:
60; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
61; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
62; AVX512VLBW-NEXT:    retq
63;
64; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
65; AVX512VLVBMI-SLOW:       # %bb.0:
66; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
67; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
68; AVX512VLVBMI-SLOW-NEXT:    retq
69;
70; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
71; AVX512VLVBMI-FAST-ALL:       # %bb.0:
72; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]
73; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
74; AVX512VLVBMI-FAST-ALL-NEXT:    retq
75;
76; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
77; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
78; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
79; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
80; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
81;
82; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
83; XOPAVX1:       # %bb.0:
84; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
85; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
86; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
87; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
88; XOPAVX1-NEXT:    retq
89;
90; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
91; XOPAVX2:       # %bb.0:
92; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
93; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
94; XOPAVX2-NEXT:    retq
95  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
96  ret <32 x i8> %shuffle
97}
98
99define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
100; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
101; AVX1:       # %bb.0:
102; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
103; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
104; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
105; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
106; AVX1-NEXT:    retq
107;
108; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
109; AVX2:       # %bb.0:
110; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
111; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
112; AVX2-NEXT:    retq
113;
114; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
115; AVX512VLBW:       # %bb.0:
116; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
117; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
118; AVX512VLBW-NEXT:    retq
119;
120; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
121; AVX512VLVBMI-SLOW:       # %bb.0:
122; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
123; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
124; AVX512VLVBMI-SLOW-NEXT:    retq
125;
126; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
127; AVX512VLVBMI-FAST-ALL:       # %bb.0:
128; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxwd {{.*#+}} ymm1 = [0,0,0,0,0,0,0,512]
129; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
130; AVX512VLVBMI-FAST-ALL-NEXT:    retq
131;
132; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
133; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
134; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
135; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
136; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
137;
138; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
139; XOPAVX1:       # %bb.0:
140; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
141; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
142; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
143; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
144; XOPAVX1-NEXT:    retq
145;
146; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
147; XOPAVX2:       # %bb.0:
148; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
149; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
150; XOPAVX2-NEXT:    retq
151  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
152  ret <32 x i8> %shuffle
153}
154
155define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) {
156; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
157; AVX1:       # %bb.0:
158; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
159; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
160; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
161; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
162; AVX1-NEXT:    retq
163;
164; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
165; AVX2:       # %bb.0:
166; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
167; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
168; AVX2-NEXT:    retq
169;
170; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
171; AVX512VLBW:       # %bb.0:
172; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
173; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
174; AVX512VLBW-NEXT:    retq
175;
176; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
177; AVX512VLVBMI-SLOW:       # %bb.0:
178; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
179; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
180; AVX512VLVBMI-SLOW-NEXT:    retq
181;
182; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
183; AVX512VLVBMI-FAST-ALL:       # %bb.0:
184; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,0,0,0,0,0,0,3]
185; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
186; AVX512VLVBMI-FAST-ALL-NEXT:    retq
187;
188; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
189; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
190; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
191; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
192; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
193;
194; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
195; XOPAVX1:       # %bb.0:
196; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
197; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
198; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
199; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
200; XOPAVX1-NEXT:    retq
201;
202; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
203; XOPAVX2:       # %bb.0:
204; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
205; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
206; XOPAVX2-NEXT:    retq
207  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
208  ret <32 x i8> %shuffle
209}
210
211define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
212; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
213; AVX1:       # %bb.0:
214; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
215; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
216; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
217; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
218; AVX1-NEXT:    retq
219;
220; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
221; AVX2:       # %bb.0:
222; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
223; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
224; AVX2-NEXT:    retq
225;
226; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
227; AVX512VLBW:       # %bb.0:
228; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
229; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
230; AVX512VLBW-NEXT:    retq
231;
232; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
233; AVX512VLVBMI-SLOW:       # %bb.0:
234; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
235; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
236; AVX512VLVBMI-SLOW-NEXT:    retq
237;
238; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
239; AVX512VLVBMI-FAST-ALL:       # %bb.0:
240; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxdq {{.*#+}} ymm1 = [0,0,0,67108864]
241; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
242; AVX512VLVBMI-FAST-ALL-NEXT:    retq
243;
244; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
245; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
246; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
247; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
248; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
249;
250; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
251; XOPAVX1:       # %bb.0:
252; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
253; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
254; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
255; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
256; XOPAVX1-NEXT:    retq
257;
258; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
259; XOPAVX2:       # %bb.0:
260; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
261; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
262; XOPAVX2-NEXT:    retq
263  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
264  ret <32 x i8> %shuffle
265}
266
267define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
268; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
269; AVX1:       # %bb.0:
270; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
271; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
272; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
273; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
274; AVX1-NEXT:    retq
275;
276; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
277; AVX2:       # %bb.0:
278; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
279; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
280; AVX2-NEXT:    retq
281;
282; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
283; AVX512VLBW:       # %bb.0:
284; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
285; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
286; AVX512VLBW-NEXT:    retq
287;
288; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
289; AVX512VLVBMI-SLOW:       # %bb.0:
290; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
291; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
292; AVX512VLVBMI-SLOW-NEXT:    retq
293;
294; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
295; AVX512VLVBMI-FAST-ALL:       # %bb.0:
296; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0]
297; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
298; AVX512VLVBMI-FAST-ALL-NEXT:    retq
299;
300; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
301; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
302; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
303; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
304; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
305;
306; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
307; XOPAVX1:       # %bb.0:
308; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
309; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
310; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
311; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
312; XOPAVX1-NEXT:    retq
313;
314; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
315; XOPAVX2:       # %bb.0:
316; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
317; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
318; XOPAVX2-NEXT:    retq
319  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
320  ret <32 x i8> %shuffle
321}
322
323define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
324; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
325; AVX1:       # %bb.0:
326; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
327; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
328; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
329; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
330; AVX1-NEXT:    retq
331;
332; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
333; AVX2:       # %bb.0:
334; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
335; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
336; AVX2-NEXT:    retq
337;
338; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
339; AVX512VLBW:       # %bb.0:
340; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
341; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
342; AVX512VLBW-NEXT:    retq
343;
344; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
345; AVX512VLVBMI-SLOW:       # %bb.0:
346; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
347; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
348; AVX512VLVBMI-SLOW-NEXT:    retq
349;
350; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
351; AVX512VLVBMI-FAST-ALL:       # %bb.0:
352; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxwq {{.*#+}} ymm1 = [0,0,0,1536]
353; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
354; AVX512VLVBMI-FAST-ALL-NEXT:    retq
355;
356; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
357; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
358; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
359; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
360; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
361;
362; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
363; XOPAVX1:       # %bb.0:
364; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
365; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
366; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
367; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
368; XOPAVX1-NEXT:    retq
369;
370; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
371; XOPAVX2:       # %bb.0:
372; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
373; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
374; XOPAVX2-NEXT:    retq
375  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
376  ret <32 x i8> %shuffle
377}
378
379define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
380; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
381; AVX1:       # %bb.0:
382; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
383; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
384; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
385; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
386; AVX1-NEXT:    retq
387;
388; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
389; AVX2:       # %bb.0:
390; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
391; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
392; AVX2-NEXT:    retq
393;
394; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
395; AVX512VLBW:       # %bb.0:
396; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
397; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
398; AVX512VLBW-NEXT:    retq
399;
400; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
401; AVX512VLVBMI-SLOW:       # %bb.0:
402; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
403; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
404; AVX512VLVBMI-SLOW-NEXT:    retq
405;
406; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
407; AVX512VLVBMI-FAST-ALL:       # %bb.0:
408; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,0,0,7]
409; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
410; AVX512VLVBMI-FAST-ALL-NEXT:    retq
411;
412; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
413; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
414; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
415; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
416; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
417;
418; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
419; XOPAVX1:       # %bb.0:
420; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
421; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
422; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
423; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
424; XOPAVX1-NEXT:    retq
425;
426; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
427; XOPAVX2:       # %bb.0:
428; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
429; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
430; XOPAVX2-NEXT:    retq
431  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
432  ret <32 x i8> %shuffle
433}
434
435define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
436; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
437; AVX1:       # %bb.0:
438; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
439; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
440; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
441; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
442; AVX1-NEXT:    retq
443;
444; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
445; AVX2:       # %bb.0:
446; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
447; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
448; AVX2-NEXT:    retq
449;
450; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
451; AVX512VLBW:       # %bb.0:
452; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
453; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
454; AVX512VLBW-NEXT:    retq
455;
456; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
457; AVX512VLVBMI-SLOW:       # %bb.0:
458; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
459; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
460; AVX512VLVBMI-SLOW-NEXT:    retq
461;
462; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
463; AVX512VLVBMI-FAST-ALL:       # %bb.0:
464; AVX512VLVBMI-FAST-ALL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
465; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
466; AVX512VLVBMI-FAST-ALL-NEXT:    retq
467;
468; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
469; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
470; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
471; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
472; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
473;
474; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
475; XOPAVX1:       # %bb.0:
476; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
477; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
478; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
479; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
480; XOPAVX1-NEXT:    retq
481;
482; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
483; XOPAVX2:       # %bb.0:
484; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
485; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
486; XOPAVX2-NEXT:    retq
487  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
488  ret <32 x i8> %shuffle
489}
490
491define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
492; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
493; AVX1:       # %bb.0:
494; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
495; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
496; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
497; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
498; AVX1-NEXT:    retq
499;
500; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
501; AVX2:       # %bb.0:
502; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
503; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
504; AVX2-NEXT:    retq
505;
506; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
507; AVX512VLBW:       # %bb.0:
508; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
509; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
510; AVX512VLBW-NEXT:    retq
511;
512; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
513; AVX512VLVBMI-SLOW:       # %bb.0:
514; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
515; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
516; AVX512VLVBMI-SLOW-NEXT:    retq
517;
518; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
519; AVX512VLVBMI-FAST-ALL:       # %bb.0:
520; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0]
521; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
522; AVX512VLVBMI-FAST-ALL-NEXT:    retq
523;
524; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
525; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
526; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
527; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
528; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
529;
530; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
531; XOPAVX1:       # %bb.0:
532; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
533; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
534; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
535; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
536; XOPAVX1-NEXT:    retq
537;
538; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
539; XOPAVX2:       # %bb.0:
540; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
541; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
542; XOPAVX2-NEXT:    retq
543  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
544  ret <32 x i8> %shuffle
545}
546
547define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
548; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
549; AVX1:       # %bb.0:
550; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
551; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
552; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
553; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
554; AVX1-NEXT:    retq
555;
556; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
557; AVX2:       # %bb.0:
558; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
559; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
560; AVX2-NEXT:    retq
561;
562; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
563; AVX512VLBW:       # %bb.0:
564; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
565; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
566; AVX512VLBW-NEXT:    retq
567;
568; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
569; AVX512VLVBMI-SLOW:       # %bb.0:
570; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
571; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
572; AVX512VLVBMI-SLOW-NEXT:    retq
573;
574; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
575; AVX512VLVBMI-FAST-ALL:       # %bb.0:
576; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxwd {{.*#+}} ymm1 = [0,0,0,0,0,2560,0,0]
577; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
578; AVX512VLVBMI-FAST-ALL-NEXT:    retq
579;
580; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
581; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
582; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
583; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
584; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
585;
586; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
587; XOPAVX1:       # %bb.0:
588; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
589; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
590; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
591; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
592; XOPAVX1-NEXT:    retq
593;
594; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
595; XOPAVX2:       # %bb.0:
596; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
597; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
598; XOPAVX2-NEXT:    retq
599  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
600  ret <32 x i8> %shuffle
601}
602
603define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
604; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
605; AVX1:       # %bb.0:
606; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
607; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
608; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
609; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
610; AVX1-NEXT:    retq
611;
612; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
613; AVX2:       # %bb.0:
614; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
615; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
616; AVX2-NEXT:    retq
617;
618; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
619; AVX512VLBW:       # %bb.0:
620; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
621; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
622; AVX512VLBW-NEXT:    retq
623;
624; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
625; AVX512VLVBMI-SLOW:       # %bb.0:
626; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
627; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
628; AVX512VLVBMI-SLOW-NEXT:    retq
629;
630; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
631; AVX512VLVBMI-FAST-ALL:       # %bb.0:
632; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,0,0,0,0,11,0,0]
633; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
634; AVX512VLVBMI-FAST-ALL-NEXT:    retq
635;
636; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
637; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
638; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
639; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
640; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
641;
642; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
643; XOPAVX1:       # %bb.0:
644; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
645; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
646; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
647; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
648; XOPAVX1-NEXT:    retq
649;
650; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
651; XOPAVX2:       # %bb.0:
652; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
653; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
654; XOPAVX2-NEXT:    retq
655  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
656  ret <32 x i8> %shuffle
657}
658
659define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
660; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
661; AVX1:       # %bb.0:
662; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
663; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
664; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
665; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
666; AVX1-NEXT:    retq
667;
668; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
669; AVX2:       # %bb.0:
670; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
671; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
672; AVX2-NEXT:    retq
673;
674; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
675; AVX512VLBW:       # %bb.0:
676; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
677; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
678; AVX512VLBW-NEXT:    retq
679;
680; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
681; AVX512VLVBMI-SLOW:       # %bb.0:
682; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
683; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
684; AVX512VLVBMI-SLOW-NEXT:    retq
685;
686; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
687; AVX512VLVBMI-FAST-ALL:       # %bb.0:
688; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxdq {{.*#+}} ymm1 = [0,0,201326592,0]
689; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
690; AVX512VLVBMI-FAST-ALL-NEXT:    retq
691;
692; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
693; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
694; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
695; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
696; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
697;
698; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
699; XOPAVX1:       # %bb.0:
700; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
701; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
702; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
703; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
704; XOPAVX1-NEXT:    retq
705;
706; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
707; XOPAVX2:       # %bb.0:
708; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
709; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
710; XOPAVX2-NEXT:    retq
711  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
712  ret <32 x i8> %shuffle
713}
714
715define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
716; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
717; AVX1:       # %bb.0:
718; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
719; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
720; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
721; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
722; AVX1-NEXT:    retq
723;
724; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
725; AVX2:       # %bb.0:
726; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
727; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
728; AVX2-NEXT:    retq
729;
730; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
731; AVX512VLBW:       # %bb.0:
732; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
733; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
734; AVX512VLBW-NEXT:    retq
735;
736; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
737; AVX512VLVBMI-SLOW:       # %bb.0:
738; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
739; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
740; AVX512VLVBMI-SLOW-NEXT:    retq
741;
742; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
743; AVX512VLVBMI-FAST-ALL:       # %bb.0:
744; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0]
745; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
746; AVX512VLVBMI-FAST-ALL-NEXT:    retq
747;
748; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
749; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
750; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
751; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
752; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
753;
754; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
755; XOPAVX1:       # %bb.0:
756; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
757; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
758; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
759; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
760; XOPAVX1-NEXT:    retq
761;
762; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
763; XOPAVX2:       # %bb.0:
764; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
765; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
766; XOPAVX2-NEXT:    retq
767  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
768  ret <32 x i8> %shuffle
769}
770
771define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
772; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
773; AVX1:       # %bb.0:
774; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
775; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
776; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
777; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
778; AVX1-NEXT:    retq
779;
780; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
781; AVX2:       # %bb.0:
782; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
783; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
784; AVX2-NEXT:    retq
785;
786; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
787; AVX512VLBW:       # %bb.0:
788; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
789; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
790; AVX512VLBW-NEXT:    retq
791;
792; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
793; AVX512VLVBMI-SLOW:       # %bb.0:
794; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
795; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
796; AVX512VLVBMI-SLOW-NEXT:    retq
797;
798; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
799; AVX512VLVBMI-FAST-ALL:       # %bb.0:
800; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxwq {{.*#+}} ymm1 = [0,0,3584,0]
801; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
802; AVX512VLVBMI-FAST-ALL-NEXT:    retq
803;
804; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
805; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
806; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
807; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
808; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
809;
810; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
811; XOPAVX1:       # %bb.0:
812; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
813; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
814; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
815; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
816; XOPAVX1-NEXT:    retq
817;
818; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
819; XOPAVX2:       # %bb.0:
820; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
821; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
822; XOPAVX2-NEXT:    retq
823  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
824  ret <32 x i8> %shuffle
825}
826
827define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
828; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
829; AVX1:       # %bb.0:
830; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
831; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
832; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
833; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
834; AVX1-NEXT:    retq
835;
836; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
837; AVX2:       # %bb.0:
838; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
839; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
840; AVX2-NEXT:    retq
841;
842; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
843; AVX512VLBW:       # %bb.0:
844; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
845; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
846; AVX512VLBW-NEXT:    retq
847;
848; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
849; AVX512VLVBMI-SLOW:       # %bb.0:
850; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
851; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
852; AVX512VLVBMI-SLOW-NEXT:    retq
853;
854; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
855; AVX512VLVBMI-FAST-ALL:       # %bb.0:
856; AVX512VLVBMI-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,0,15,0]
857; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
858; AVX512VLVBMI-FAST-ALL-NEXT:    retq
859;
860; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
861; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
862; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
863; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
864; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
865;
866; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
867; XOPAVX1:       # %bb.0:
868; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
869; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
870; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
871; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
872; XOPAVX1-NEXT:    retq
873;
874; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
875; XOPAVX2:       # %bb.0:
876; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
877; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
878; XOPAVX2-NEXT:    retq
879  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
880  ret <32 x i8> %shuffle
881}
882
883define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
884; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
885; AVX1:       # %bb.0:
886; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
887; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
888; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
889; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
890; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]
891; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
892; AVX1-NEXT:    retq
893;
894; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
895; AVX2:       # %bb.0:
896; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
897; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
898; AVX2-NEXT:    retq
899;
900; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
901; AVX512VLBW:       # %bb.0:
902; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
903; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
904; AVX512VLBW-NEXT:    retq
905;
906; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
907; AVX512VLVBMI:       # %bb.0:
908; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16]
909; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
910; AVX512VLVBMI-NEXT:    retq
911;
912; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
913; XOPAVX1:       # %bb.0:
914; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
915; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
916; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
917; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[0]
918; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
919; XOPAVX1-NEXT:    retq
920;
921; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
922; XOPAVX2:       # %bb.0:
923; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
924; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
925; XOPAVX2-NEXT:    retq
926  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
927  ret <32 x i8> %shuffle
928}
929
930define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
931; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
932; AVX1:       # %bb.0:
933; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
934; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
935; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
936; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
937; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0]
938; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
939; AVX1-NEXT:    retq
940;
941; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
942; AVX2:       # %bb.0:
943; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
944; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
945; AVX2-NEXT:    retq
946;
947; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
948; AVX512VLBW:       # %bb.0:
949; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
950; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
951; AVX512VLBW-NEXT:    retq
952;
953; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
954; AVX512VLVBMI:       # %bb.0:
955; AVX512VLVBMI-NEXT:    vpmovsxbw {{.*#+}} xmm1 = [0,0,0,0,0,0,0,17]
956; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
957; AVX512VLVBMI-NEXT:    retq
958;
959; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
960; XOPAVX1:       # %bb.0:
961; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
962; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
963; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
964; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[1],xmm0[0]
965; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
966; XOPAVX1-NEXT:    retq
967;
968; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
969; XOPAVX2:       # %bb.0:
970; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
971; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
972; XOPAVX2-NEXT:    retq
973  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
974  ret <32 x i8> %shuffle
975}
976
977define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
978; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
979; AVX1:       # %bb.0:
980; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
981; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
982; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
983; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
984; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0]
985; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
986; AVX1-NEXT:    retq
987;
988; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
989; AVX2:       # %bb.0:
990; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
991; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
992; AVX2-NEXT:    retq
993;
994; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
995; AVX512VLBW:       # %bb.0:
996; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
997; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
998; AVX512VLBW-NEXT:    retq
999;
1000; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1001; AVX512VLVBMI:       # %bb.0:
1002; AVX512VLVBMI-NEXT:    vpmovsxwd {{.*#+}} xmm1 = [0,0,0,4608]
1003; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1004; AVX512VLVBMI-NEXT:    retq
1005;
1006; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1007; XOPAVX1:       # %bb.0:
1008; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1009; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1010; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1011; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[2],xmm0[0,0]
1012; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1013; XOPAVX1-NEXT:    retq
1014;
1015; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1016; XOPAVX2:       # %bb.0:
1017; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1018; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1019; XOPAVX2-NEXT:    retq
1020  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1021  ret <32 x i8> %shuffle
1022}
1023
1024define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1025; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1026; AVX1:       # %bb.0:
1027; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1028; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1029; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1030; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1031; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0]
1032; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1033; AVX1-NEXT:    retq
1034;
1035; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1036; AVX2:       # %bb.0:
1037; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1038; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1039; AVX2-NEXT:    retq
1040;
1041; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1042; AVX512VLBW:       # %bb.0:
1043; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1044; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1045; AVX512VLBW-NEXT:    retq
1046;
1047; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1048; AVX512VLVBMI:       # %bb.0:
1049; AVX512VLVBMI-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,0,0,19]
1050; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1051; AVX512VLVBMI-NEXT:    retq
1052;
1053; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1054; XOPAVX1:       # %bb.0:
1055; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1056; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1057; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1058; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],xmm2[3],xmm0[0,0,0]
1059; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1060; XOPAVX1-NEXT:    retq
1061;
1062; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1063; XOPAVX2:       # %bb.0:
1064; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1065; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1066; XOPAVX2-NEXT:    retq
1067  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1068  ret <32 x i8> %shuffle
1069}
1070
1071define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1072; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1073; AVX1:       # %bb.0:
1074; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1075; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1076; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1077; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1078; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0]
1079; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1080; AVX1-NEXT:    retq
1081;
1082; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1083; AVX2:       # %bb.0:
1084; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1085; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1086; AVX2-NEXT:    retq
1087;
1088; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1089; AVX512VLBW:       # %bb.0:
1090; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1091; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1092; AVX512VLBW-NEXT:    retq
1093;
1094; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1095; AVX512VLVBMI:       # %bb.0:
1096; AVX512VLVBMI-NEXT:    vpmovsxdq {{.*#+}} xmm1 = [0,335544320]
1097; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1098; AVX512VLVBMI-NEXT:    retq
1099;
1100; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1101; XOPAVX1:       # %bb.0:
1102; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1103; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1104; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1105; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],xmm2[4],xmm0[0,0,0,0]
1106; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1107; XOPAVX1-NEXT:    retq
1108;
1109; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1110; XOPAVX2:       # %bb.0:
1111; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1112; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1113; XOPAVX2-NEXT:    retq
1114  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1115  ret <32 x i8> %shuffle
1116}
1117
1118define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1119; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1120; AVX1:       # %bb.0:
1121; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1122; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1123; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1124; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1125; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0]
1126; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1127; AVX1-NEXT:    retq
1128;
1129; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1130; AVX2:       # %bb.0:
1131; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1132; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1133; AVX2-NEXT:    retq
1134;
1135; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1136; AVX512VLBW:       # %bb.0:
1137; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1138; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1139; AVX512VLBW-NEXT:    retq
1140;
1141; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1142; AVX512VLVBMI:       # %bb.0:
1143; AVX512VLVBMI-NEXT:    vpmovsxbw {{.*#+}} xmm1 = [0,0,0,0,0,21,0,0]
1144; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1145; AVX512VLVBMI-NEXT:    retq
1146;
1147; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1148; XOPAVX1:       # %bb.0:
1149; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1150; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1151; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1152; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],xmm2[5],xmm0[0,0,0,0,0]
1153; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1154; XOPAVX1-NEXT:    retq
1155;
1156; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1157; XOPAVX2:       # %bb.0:
1158; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1159; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1160; XOPAVX2-NEXT:    retq
1161  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1162  ret <32 x i8> %shuffle
1163}
1164
1165define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1166; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1167; AVX1:       # %bb.0:
1168; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1169; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1170; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1171; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1172; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0]
1173; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1174; AVX1-NEXT:    retq
1175;
1176; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1177; AVX2:       # %bb.0:
1178; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1179; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1180; AVX2-NEXT:    retq
1181;
1182; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1183; AVX512VLBW:       # %bb.0:
1184; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1185; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1186; AVX512VLBW-NEXT:    retq
1187;
1188; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1189; AVX512VLVBMI:       # %bb.0:
1190; AVX512VLVBMI-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [0,5632]
1191; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1192; AVX512VLVBMI-NEXT:    retq
1193;
1194; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1195; XOPAVX1:       # %bb.0:
1196; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1197; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1198; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1199; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0],xmm2[6],xmm0[0,0,0,0,0,0]
1200; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1201; XOPAVX1-NEXT:    retq
1202;
1203; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1204; XOPAVX2:       # %bb.0:
1205; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1206; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1207; XOPAVX2-NEXT:    retq
1208  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1209  ret <32 x i8> %shuffle
1210}
1211
1212define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1213; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1214; AVX1:       # %bb.0:
1215; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1216; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1217; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1218; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1219; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
1220; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1221; AVX1-NEXT:    retq
1222;
1223; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1224; AVX2:       # %bb.0:
1225; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1226; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1227; AVX2-NEXT:    retq
1228;
1229; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1230; AVX512VLBW:       # %bb.0:
1231; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1232; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1233; AVX512VLBW-NEXT:    retq
1234;
1235; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1236; AVX512VLVBMI:       # %bb.0:
1237; AVX512VLVBMI-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [0,23]
1238; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1239; AVX512VLVBMI-NEXT:    retq
1240;
1241; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1242; XOPAVX1:       # %bb.0:
1243; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1244; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1245; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1246; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0],xmm2[7],xmm0[0,0,0,0,0,0,0]
1247; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1248; XOPAVX1-NEXT:    retq
1249;
1250; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1251; XOPAVX2:       # %bb.0:
1252; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1253; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1254; XOPAVX2-NEXT:    retq
1255  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1256  ret <32 x i8> %shuffle
1257}
1258
1259define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1260; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1261; AVX1:       # %bb.0:
1262; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1263; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1264; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1265; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1266; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,8,8,8,8,8,8,0,8,8,8,8,8,8,8,8]
1267; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1268; AVX1-NEXT:    retq
1269;
1270; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1271; AVX2:       # %bb.0:
1272; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1273; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1274; AVX2-NEXT:    retq
1275;
1276; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1277; AVX512VLBW:       # %bb.0:
1278; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1279; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1280; AVX512VLBW-NEXT:    retq
1281;
1282; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1283; AVX512VLVBMI:       # %bb.0:
1284; AVX512VLVBMI-NEXT:    vmovq {{.*#+}} xmm1 = [0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,0]
1285; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1286; AVX512VLVBMI-NEXT:    retq
1287;
1288; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1289; XOPAVX1:       # %bb.0:
1290; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1291; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1292; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1293; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0],xmm2[8],xmm0[0,0,0,0,0,0,0,0]
1294; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1295; XOPAVX1-NEXT:    retq
1296;
1297; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1298; XOPAVX2:       # %bb.0:
1299; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1300; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1301; XOPAVX2-NEXT:    retq
1302  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1303  ret <32 x i8> %shuffle
1304}
1305
1306define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1307; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1308; AVX1:       # %bb.0:
1309; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1310; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1311; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1312; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8]
1313; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,0,7,7,7,7,7,7,7,7,7]
1314; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1315; AVX1-NEXT:    retq
1316;
1317; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1318; AVX2:       # %bb.0:
1319; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1320; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1321; AVX2-NEXT:    retq
1322;
1323; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1324; AVX512VLBW:       # %bb.0:
1325; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1326; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1327; AVX512VLBW-NEXT:    retq
1328;
1329; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1330; AVX512VLVBMI:       # %bb.0:
1331; AVX512VLVBMI-NEXT:    vmovq {{.*#+}} xmm1 = [0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0]
1332; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1333; AVX512VLVBMI-NEXT:    retq
1334;
1335; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1336; XOPAVX1:       # %bb.0:
1337; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1338; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1339; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1340; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0],xmm2[9],xmm0[0,0,0,0,0,0,0,0,0]
1341; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1342; XOPAVX1-NEXT:    retq
1343;
1344; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1345; XOPAVX2:       # %bb.0:
1346; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1347; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1348; XOPAVX2-NEXT:    retq
1349  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1350  ret <32 x i8> %shuffle
1351}
1352
1353define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1354; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1355; AVX1:       # %bb.0:
1356; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1357; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1358; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1359; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1360; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,0,6,6,6,6,6,6,6,6,6,6]
1361; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1362; AVX1-NEXT:    retq
1363;
1364; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1365; AVX2:       # %bb.0:
1366; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1367; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1368; AVX2-NEXT:    retq
1369;
1370; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1371; AVX512VLBW:       # %bb.0:
1372; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1373; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1374; AVX512VLBW-NEXT:    retq
1375;
1376; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1377; AVX512VLVBMI:       # %bb.0:
1378; AVX512VLVBMI-NEXT:    vmovq {{.*#+}} xmm1 = [0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0]
1379; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1380; AVX512VLVBMI-NEXT:    retq
1381;
1382; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1383; XOPAVX1:       # %bb.0:
1384; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1385; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1386; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1387; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0],xmm2[10],xmm0[0,0,0,0,0,0,0,0,0,0]
1388; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1389; XOPAVX1-NEXT:    retq
1390;
1391; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1392; XOPAVX2:       # %bb.0:
1393; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1394; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1395; XOPAVX2-NEXT:    retq
1396  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1397  ret <32 x i8> %shuffle
1398}
1399
1400define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1401; AVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1402; AVX1:       # %bb.0:
1403; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1404; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1405; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1406; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
1407; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5]
1408; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1409; AVX1-NEXT:    retq
1410;
1411; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1412; AVX2:       # %bb.0:
1413; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1414; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1415; AVX2-NEXT:    retq
1416;
1417; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1418; AVX512VLBW:       # %bb.0:
1419; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1420; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1421; AVX512VLBW-NEXT:    retq
1422;
1423; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1424; AVX512VLVBMI:       # %bb.0:
1425; AVX512VLVBMI-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,27,0,0]
1426; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1427; AVX512VLVBMI-NEXT:    retq
1428;
1429; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1430; XOPAVX1:       # %bb.0:
1431; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1432; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1433; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1434; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0],xmm2[11],xmm0[0,0,0,0,0,0,0,0,0,0,0]
1435; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1436; XOPAVX1-NEXT:    retq
1437;
1438; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1439; XOPAVX2:       # %bb.0:
1440; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1441; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1442; XOPAVX2-NEXT:    retq
1443  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1444  ret <32 x i8> %shuffle
1445}
1446
1447define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1448; AVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1449; AVX1:       # %bb.0:
1450; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1451; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1452; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1453; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1454; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,4]
1455; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1456; AVX1-NEXT:    retq
1457;
1458; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1459; AVX2:       # %bb.0:
1460; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1461; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1462; AVX2-NEXT:    retq
1463;
1464; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1465; AVX512VLBW:       # %bb.0:
1466; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1467; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1468; AVX512VLBW-NEXT:    retq
1469;
1470; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1471; AVX512VLVBMI:       # %bb.0:
1472; AVX512VLVBMI-NEXT:    vmovd {{.*#+}} xmm1 = [0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0]
1473; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1474; AVX512VLVBMI-NEXT:    retq
1475;
1476; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1477; XOPAVX1:       # %bb.0:
1478; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1479; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1480; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1481; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0,0],xmm2[12],xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
1482; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1483; XOPAVX1-NEXT:    retq
1484;
1485; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1486; XOPAVX2:       # %bb.0:
1487; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1488; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1489; XOPAVX2-NEXT:    retq
1490  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1491  ret <32 x i8> %shuffle
1492}
1493
1494define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1495; AVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1496; AVX1:       # %bb.0:
1497; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1498; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1499; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1500; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
1501; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3]
1502; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1503; AVX1-NEXT:    retq
1504;
1505; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1506; AVX2:       # %bb.0:
1507; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1508; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1509; AVX2-NEXT:    retq
1510;
1511; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1512; AVX512VLBW:       # %bb.0:
1513; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1514; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1515; AVX512VLBW-NEXT:    retq
1516;
1517; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1518; AVX512VLVBMI:       # %bb.0:
1519; AVX512VLVBMI-NEXT:    vmovd {{.*#+}} xmm1 = [0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0]
1520; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1521; AVX512VLVBMI-NEXT:    retq
1522;
1523; XOPAVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1524; XOPAVX1:       # %bb.0:
1525; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1526; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1527; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1528; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,0],xmm2[13],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
1529; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1530; XOPAVX1-NEXT:    retq
1531;
1532; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1533; XOPAVX2:       # %bb.0:
1534; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1535; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1536; XOPAVX2-NEXT:    retq
1537  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1538  ret <32 x i8> %shuffle
1539}
1540
1541define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1542; AVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1543; AVX1:       # %bb.0:
1544; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1545; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1546; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1547; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1548; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1549; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1550; AVX1-NEXT:    retq
1551;
1552; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1553; AVX2:       # %bb.0:
1554; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1555; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1556; AVX2-NEXT:    retq
1557;
1558; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1559; AVX512VLBW:       # %bb.0:
1560; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1561; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1562; AVX512VLBW-NEXT:    retq
1563;
1564; AVX512VLVBMI-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1565; AVX512VLVBMI:       # %bb.0:
1566; AVX512VLVBMI-NEXT:    vmovd {{.*#+}} xmm1 = [0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1567; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1568; AVX512VLVBMI-NEXT:    retq
1569;
1570; XOPAVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1571; XOPAVX1:       # %bb.0:
1572; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1573; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1574; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1575; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0],xmm2[14],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1576; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1577; XOPAVX1-NEXT:    retq
1578;
1579; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1580; XOPAVX2:       # %bb.0:
1581; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1582; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1583; XOPAVX2-NEXT:    retq
1584  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1585  ret <32 x i8> %shuffle
1586}
1587
1588define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1589; AVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1590; AVX1:       # %bb.0:
1591; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1592; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1593; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1594; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1595; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1596; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1597; AVX1-NEXT:    retq
1598;
1599; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1600; AVX2:       # %bb.0:
1601; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1602; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1603; AVX2-NEXT:    retq
1604;
1605; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1606; AVX512VLBW:       # %bb.0:
1607; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1608; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1609; AVX512VLBW-NEXT:    retq
1610;
1611; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1612; AVX512VLVBMI:       # %bb.0:
1613; AVX512VLVBMI-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [31,0]
1614; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1615; AVX512VLVBMI-NEXT:    retq
1616;
1617; XOPAVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1618; XOPAVX1:       # %bb.0:
1619; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1620; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
1621; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1622; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm2[15],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1623; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1624; XOPAVX1-NEXT:    retq
1625;
1626; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1627; XOPAVX2:       # %bb.0:
1628; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1629; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1630; XOPAVX2-NEXT:    retq
1631  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1632  ret <32 x i8> %shuffle
1633}
1634
1635define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1636; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1637; AVX1:       # %bb.0:
1638; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1639; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1640; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1641; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1642; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1643; AVX1-NEXT:    retq
1644;
1645; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1646; AVX2OR512VL:       # %bb.0:
1647; AVX2OR512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1648; AVX2OR512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
1649; AVX2OR512VL-NEXT:    retq
1650;
1651; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1652; XOPAVX1:       # %bb.0:
1653; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1654; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1655; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1656; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1657; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1658; XOPAVX1-NEXT:    retq
1659;
1660; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1661; XOPAVX2:       # %bb.0:
1662; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1663; XOPAVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
1664; XOPAVX2-NEXT:    retq
1665  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1666  ret <32 x i8> %shuffle
1667}
1668
1669define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1670; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1671; AVX1:       # %bb.0:
1672; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1673; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1674; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1675; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1676; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1677; AVX1-NEXT:    retq
1678;
1679; AVX2OR512VL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1680; AVX2OR512VL:       # %bb.0:
1681; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
1682; AVX2OR512VL-NEXT:    retq
1683;
1684; XOPAVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1685; XOPAVX1:       # %bb.0:
1686; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1687; XOPAVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1688; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1689; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1690; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1691; XOPAVX1-NEXT:    retq
1692;
1693; XOPAVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1694; XOPAVX2:       # %bb.0:
1695; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
1696; XOPAVX2-NEXT:    retq
1697  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1698  ret <32 x i8> %shuffle
1699}
1700
1701define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1702; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1703; AVX1:       # %bb.0:
1704; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1705; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1706; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1707; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1708; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1709; AVX1-NEXT:    retq
1710;
1711; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1712; AVX2OR512VL:       # %bb.0:
1713; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1714; AVX2OR512VL-NEXT:    retq
1715;
1716; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1717; XOPAVX1:       # %bb.0:
1718; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1719; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1720; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1721; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1722; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1723; XOPAVX1-NEXT:    retq
1724;
1725; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1726; XOPAVX2:       # %bb.0:
1727; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1728; XOPAVX2-NEXT:    retq
1729  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1730  ret <32 x i8> %shuffle
1731}
1732
1733define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1734; AVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1735; AVX1:       # %bb.0:
1736; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1737; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
1738; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1739; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1740; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1741; AVX1-NEXT:    retq
1742;
1743; AVX2OR512VL-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1744; AVX2OR512VL:       # %bb.0:
1745; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
1746; AVX2OR512VL-NEXT:    retq
1747;
1748; XOPAVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1749; XOPAVX1:       # %bb.0:
1750; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1751; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
1752; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1753; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1754; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1755; XOPAVX1-NEXT:    retq
1756;
1757; XOPAVX2-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1758; XOPAVX2:       # %bb.0:
1759; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
1760; XOPAVX2-NEXT:    retq
1761  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1762  ret <32 x i8> %shuffle
1763}
1764
1765define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) {
1766; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1767; AVX1:       # %bb.0:
1768; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1769; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1770; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1771; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1772; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1773; AVX1-NEXT:    retq
1774;
1775; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1776; AVX2OR512VL:       # %bb.0:
1777; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
1778; AVX2OR512VL-NEXT:    retq
1779;
1780; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1781; XOPAVX1:       # %bb.0:
1782; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1783; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1784; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1785; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1786; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1787; XOPAVX1-NEXT:    retq
1788;
1789; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1790; XOPAVX2:       # %bb.0:
1791; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
1792; XOPAVX2-NEXT:    retq
1793  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
1794  ret <32 x i8> %shuffle
1795}
1796
1797define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1798; AVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1799; AVX1:       # %bb.0:
1800; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1801; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
1802; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1803; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1804; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1805; AVX1-NEXT:    retq
1806;
1807; AVX2OR512VL-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1808; AVX2OR512VL:       # %bb.0:
1809; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
1810; AVX2OR512VL-NEXT:    retq
1811;
1812; XOPAVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1813; XOPAVX1:       # %bb.0:
1814; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1815; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
1816; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1817; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1818; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1819; XOPAVX1-NEXT:    retq
1820;
1821; XOPAVX2-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1822; XOPAVX2:       # %bb.0:
1823; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
1824; XOPAVX2-NEXT:    retq
1825  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31>
1826  ret <32 x i8> %shuffle
1827}
1828
1829define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
1830; AVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1831; AVX1:       # %bb.0:
1832; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1833; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1834; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1835; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1836; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1837; AVX1-NEXT:    retq
1838;
1839; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1840; AVX2OR512VL:       # %bb.0:
1841; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
1842; AVX2OR512VL-NEXT:    retq
1843;
1844; XOPAVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1845; XOPAVX1:       # %bb.0:
1846; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1847; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1848; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1849; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1850; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1851; XOPAVX1-NEXT:    retq
1852;
1853; XOPAVX2-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1854; XOPAVX2:       # %bb.0:
1855; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
1856; XOPAVX2-NEXT:    retq
1857  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
1858  ret <32 x i8> %shuffle
1859}
1860
1861define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) {
1862; AVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1863; AVX1:       # %bb.0:
1864; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1865; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
1866; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1867; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1868; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1869; AVX1-NEXT:    retq
1870;
1871; AVX2OR512VL-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1872; AVX2OR512VL:       # %bb.0:
1873; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
1874; AVX2OR512VL-NEXT:    retq
1875;
1876; XOPAVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1877; XOPAVX1:       # %bb.0:
1878; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1879; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
1880; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1881; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1882; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1883; XOPAVX1-NEXT:    retq
1884;
1885; XOPAVX2-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1886; XOPAVX2:       # %bb.0:
1887; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
1888; XOPAVX2-NEXT:    retq
1889  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
1890  ret <32 x i8> %shuffle
1891}
1892
1893define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
1894; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1895; AVX1:       # %bb.0:
1896; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1897; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1898; AVX1-NEXT:    retq
1899;
1900; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1901; AVX2:       # %bb.0:
1902; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1903; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1904; AVX2-NEXT:    retq
1905;
1906; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1907; AVX512VLBW:       # %bb.0:
1908; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1909; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1910; AVX512VLBW-NEXT:    retq
1911;
1912; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1913; AVX512VLVBMI-SLOW:       # %bb.0:
1914; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1915; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1916; AVX512VLVBMI-SLOW-NEXT:    retq
1917;
1918; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1919; AVX512VLVBMI-FAST-ALL:       # %bb.0:
1920; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1921; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
1922; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1923; AVX512VLVBMI-FAST-ALL-NEXT:    retq
1924;
1925; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1926; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
1927; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1928; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1929; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
1930;
1931; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1932; XOPAVX1:       # %bb.0:
1933; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1934; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1935; XOPAVX1-NEXT:    retq
1936;
1937; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1938; XOPAVX2:       # %bb.0:
1939; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1940; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1941; XOPAVX2-NEXT:    retq
1942  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1943  ret <32 x i8> %shuffle
1944}
1945
1946define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
1947; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1948; AVX1:       # %bb.0:
1949; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1950; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1951; AVX1-NEXT:    retq
1952;
1953; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1954; AVX2:       # %bb.0:
1955; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1956; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1957; AVX2-NEXT:    retq
1958;
1959; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1960; AVX512VLBW:       # %bb.0:
1961; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1962; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1963; AVX512VLBW-NEXT:    retq
1964;
1965; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1966; AVX512VLVBMI-SLOW:       # %bb.0:
1967; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1968; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1969; AVX512VLVBMI-SLOW-NEXT:    retq
1970;
1971; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1972; AVX512VLVBMI-FAST-ALL:       # %bb.0:
1973; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1974; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
1975; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
1976; AVX512VLVBMI-FAST-ALL-NEXT:    retq
1977;
1978; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1979; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
1980; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1981; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1982; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
1983;
1984; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1985; XOPAVX1:       # %bb.0:
1986; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1987; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1988; XOPAVX1-NEXT:    retq
1989;
1990; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1991; XOPAVX2:       # %bb.0:
1992; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1993; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1994; XOPAVX2-NEXT:    retq
1995  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1996  ret <32 x i8> %shuffle
1997}
1998
1999define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2000; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2001; AVX1:       # %bb.0:
2002; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2003; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2004; AVX1-NEXT:    retq
2005;
2006; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2007; AVX2:       # %bb.0:
2008; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2009; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2010; AVX2-NEXT:    retq
2011;
2012; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2013; AVX512VLBW:       # %bb.0:
2014; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2015; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2016; AVX512VLBW-NEXT:    retq
2017;
2018; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2019; AVX512VLVBMI-SLOW:       # %bb.0:
2020; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2021; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2022; AVX512VLVBMI-SLOW-NEXT:    retq
2023;
2024; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2025; AVX512VLVBMI-FAST-ALL:       # %bb.0:
2026; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2027; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
2028; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
2029; AVX512VLVBMI-FAST-ALL-NEXT:    retq
2030;
2031; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2032; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
2033; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2034; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2035; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
2036;
2037; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2038; XOPAVX1:       # %bb.0:
2039; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2040; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2041; XOPAVX1-NEXT:    retq
2042;
2043; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2044; XOPAVX2:       # %bb.0:
2045; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2046; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2047; XOPAVX2-NEXT:    retq
2048  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2049  ret <32 x i8> %shuffle
2050}
2051
2052define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2053; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2054; AVX1:       # %bb.0:
2055; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2056; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2057; AVX1-NEXT:    retq
2058;
2059; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2060; AVX2:       # %bb.0:
2061; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2062; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2063; AVX2-NEXT:    retq
2064;
2065; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2066; AVX512VLBW:       # %bb.0:
2067; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2068; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2069; AVX512VLBW-NEXT:    retq
2070;
2071; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2072; AVX512VLVBMI-SLOW:       # %bb.0:
2073; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2074; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2075; AVX512VLVBMI-SLOW-NEXT:    retq
2076;
2077; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2078; AVX512VLVBMI-FAST-ALL:       # %bb.0:
2079; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2080; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
2081; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
2082; AVX512VLVBMI-FAST-ALL-NEXT:    retq
2083;
2084; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2085; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
2086; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2087; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2088; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
2089;
2090; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2091; XOPAVX1:       # %bb.0:
2092; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2093; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2094; XOPAVX1-NEXT:    retq
2095;
2096; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2097; XOPAVX2:       # %bb.0:
2098; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2099; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2100; XOPAVX2-NEXT:    retq
2101  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2102  ret <32 x i8> %shuffle
2103}
2104
2105define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2106; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2107; AVX1:       # %bb.0:
2108; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2109; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2110; AVX1-NEXT:    retq
2111;
2112; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2113; AVX2:       # %bb.0:
2114; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2115; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2116; AVX2-NEXT:    retq
2117;
2118; AVX512VLBW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2119; AVX512VLBW:       # %bb.0:
2120; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2121; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2122; AVX512VLBW-NEXT:    retq
2123;
2124; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2125; AVX512VLVBMI-SLOW:       # %bb.0:
2126; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2127; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2128; AVX512VLVBMI-SLOW-NEXT:    retq
2129;
2130; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2131; AVX512VLVBMI-FAST-ALL:       # %bb.0:
2132; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2133; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
2134; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
2135; AVX512VLVBMI-FAST-ALL-NEXT:    retq
2136;
2137; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2138; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
2139; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2140; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2141; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
2142;
2143; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2144; XOPAVX1:       # %bb.0:
2145; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2146; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2147; XOPAVX1-NEXT:    retq
2148;
2149; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2150; XOPAVX2:       # %bb.0:
2151; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2152; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2153; XOPAVX2-NEXT:    retq
2154  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2155  ret <32 x i8> %shuffle
2156}
2157
2158define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2159; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2160; AVX1:       # %bb.0:
2161; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2162; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2163; AVX1-NEXT:    retq
2164;
2165; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2166; AVX2:       # %bb.0:
2167; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2168; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2169; AVX2-NEXT:    retq
2170;
2171; AVX512VLBW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2172; AVX512VLBW:       # %bb.0:
2173; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2174; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2175; AVX512VLBW-NEXT:    retq
2176;
2177; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2178; AVX512VLVBMI-SLOW:       # %bb.0:
2179; AVX512VLVBMI-SLOW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2180; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2181; AVX512VLVBMI-SLOW-NEXT:    retq
2182;
2183; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2184; AVX512VLVBMI-FAST-ALL:       # %bb.0:
2185; AVX512VLVBMI-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2186; AVX512VLVBMI-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
2187; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
2188; AVX512VLVBMI-FAST-ALL-NEXT:    retq
2189;
2190; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2191; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
2192; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2193; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2194; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
2195;
2196; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2197; XOPAVX1:       # %bb.0:
2198; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2199; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2200; XOPAVX1-NEXT:    retq
2201;
2202; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2203; XOPAVX2:       # %bb.0:
2204; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2205; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2206; XOPAVX2-NEXT:    retq
2207  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2208  ret <32 x i8> %shuffle
2209}
2210
2211define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
2212; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2213; AVX1:       # %bb.0:
2214; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2215; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
2216; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
2217; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
2218; AVX1-NEXT:    retq
2219;
2220; AVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2221; AVX2:       # %bb.0:
2222; AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2223; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2224; AVX2-NEXT:    retq
2225;
2226; AVX512VL-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2227; AVX512VL:       # %bb.0:
2228; AVX512VL-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
2229; AVX512VL-NEXT:    kmovd %eax, %k1
2230; AVX512VL-NEXT:    vmovdqu8 %ymm1, %ymm0 {%k1}
2231; AVX512VL-NEXT:    retq
2232;
2233; XOPAVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2234; XOPAVX1:       # %bb.0:
2235; XOPAVX1-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
2236; XOPAVX1-NEXT:    retq
2237;
2238; XOPAVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2239; XOPAVX2:       # %bb.0:
2240; XOPAVX2-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2241; XOPAVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2242; XOPAVX2-NEXT:    retq
2243  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
2244  ret <32 x i8> %shuffle
2245}
2246
2247define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
2248; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2249; AVX1:       # %bb.0:
2250; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2251; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
2252; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
2253; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
2254; AVX1-NEXT:    retq
2255;
2256; AVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2257; AVX2:       # %bb.0:
2258; AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2259; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
2260; AVX2-NEXT:    retq
2261;
2262; AVX512VL-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2263; AVX512VL:       # %bb.0:
2264; AVX512VL-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
2265; AVX512VL-NEXT:    kmovd %eax, %k1
2266; AVX512VL-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
2267; AVX512VL-NEXT:    retq
2268;
2269; XOPAVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2270; XOPAVX1:       # %bb.0:
2271; XOPAVX1-NEXT:    vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0
2272; XOPAVX1-NEXT:    retq
2273;
2274; XOPAVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2275; XOPAVX2:       # %bb.0:
2276; XOPAVX2-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2277; XOPAVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
2278; XOPAVX2-NEXT:    retq
2279  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
2280  ret <32 x i8> %shuffle
2281}
2282
2283; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780
2284
2285define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) {
2286; AVX1-LABEL: load_fold_pblendvb:
2287; AVX1:       # %bb.0:
2288; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2289; AVX1-NEXT:    vandnps (%rdi), %ymm1, %ymm2
2290; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
2291; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
2292; AVX1-NEXT:    retq
2293;
2294; AVX2-LABEL: load_fold_pblendvb:
2295; AVX2:       # %bb.0:
2296; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
2297; AVX2-NEXT:    vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2298; AVX2-NEXT:    retq
2299;
2300; AVX512VL-LABEL: load_fold_pblendvb:
2301; AVX512VL:       # %bb.0:
2302; AVX512VL-NEXT:    movl $1953789044, %eax # imm = 0x74747474
2303; AVX512VL-NEXT:    kmovd %eax, %k1
2304; AVX512VL-NEXT:    vmovdqu8 (%rdi), %ymm0 {%k1}
2305; AVX512VL-NEXT:    retq
2306;
2307; XOPAVX1-LABEL: load_fold_pblendvb:
2308; XOPAVX1:       # %bb.0:
2309; XOPAVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2310; XOPAVX1-NEXT:    vpcmov %ymm1, (%rdi), %ymm0, %ymm0
2311; XOPAVX1-NEXT:    retq
2312;
2313; XOPAVX2-LABEL: load_fold_pblendvb:
2314; XOPAVX2:       # %bb.0:
2315; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
2316; XOPAVX2-NEXT:    vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2317; XOPAVX2-NEXT:    retq
2318  %x = load <32 x i8>, ptr %px, align 32
2319  %select = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
2320  ret <32 x i8> %select
2321}
2322
2323define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) {
2324; AVX1-LABEL: load_fold_pblendvb_commute:
2325; AVX1:       # %bb.0:
2326; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2327; AVX1-NEXT:    vandnps %ymm0, %ymm1, %ymm0
2328; AVX1-NEXT:    vandps (%rdi), %ymm1, %ymm1
2329; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
2330; AVX1-NEXT:    retq
2331;
2332; AVX2-LABEL: load_fold_pblendvb_commute:
2333; AVX2:       # %bb.0:
2334; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2335; AVX2-NEXT:    vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2336; AVX2-NEXT:    retq
2337;
2338; AVX512VL-LABEL: load_fold_pblendvb_commute:
2339; AVX512VL:       # %bb.0:
2340; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1
2341; AVX512VL-NEXT:    movl $1953789044, %eax # imm = 0x74747474
2342; AVX512VL-NEXT:    kmovd %eax, %k1
2343; AVX512VL-NEXT:    vmovdqu8 %ymm0, %ymm1 {%k1}
2344; AVX512VL-NEXT:    vmovdqa %ymm1, %ymm0
2345; AVX512VL-NEXT:    retq
2346;
2347; XOPAVX1-LABEL: load_fold_pblendvb_commute:
2348; XOPAVX1:       # %bb.0:
2349; XOPAVX1-NEXT:    vmovdqa (%rdi), %ymm1
2350; XOPAVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2351; XOPAVX1-NEXT:    vpcmov %ymm2, %ymm0, %ymm1, %ymm0
2352; XOPAVX1-NEXT:    retq
2353;
2354; XOPAVX2-LABEL: load_fold_pblendvb_commute:
2355; XOPAVX2:       # %bb.0:
2356; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2357; XOPAVX2-NEXT:    vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2358; XOPAVX2-NEXT:    retq
2359  %x = load <32 x i8>, ptr %px, align 32
2360  %select = shufflevector <32 x i8> %y, <32 x i8> %x, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
2361  ret <32 x i8> %select
2362}
2363
2364define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
2365; AVX1-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2366; AVX1:       # %bb.0:
2367; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2368; AVX1-NEXT:    retq
2369;
2370; AVX2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2371; AVX2:       # %bb.0:
2372; AVX2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2373; AVX2-NEXT:    retq
2374;
2375; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2376; AVX512VL:       # %bb.0:
2377; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
2378; AVX512VL-NEXT:    retq
2379;
2380; XOP-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2381; XOP:       # %bb.0:
2382; XOP-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2383; XOP-NEXT:    retq
2384  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
2385  ret <32 x i8> %shuffle
2386}
2387
2388define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31(<32 x i8> %a) {
2389; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2390; AVX1:       # %bb.0:
2391; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
2392; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2393; AVX1-NEXT:    retq
2394;
2395; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2396; AVX2OR512VL:       # %bb.0:
2397; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
2398; AVX2OR512VL-NEXT:    retq
2399;
2400; XOPAVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2401; XOPAVX1:       # %bb.0:
2402; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
2403; XOPAVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2404; XOPAVX1-NEXT:    retq
2405;
2406; XOPAVX2-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2407; XOPAVX2:       # %bb.0:
2408; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
2409; XOPAVX2-NEXT:    retq
2410  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 2, i32 32, i32 4, i32 poison, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2411  ret <32 x i8> %shuffle
2412}
2413
2414define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
2415; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2416; AVX1:       # %bb.0:
2417; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2418; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2419; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2420; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2421; AVX1-NEXT:    retq
2422;
2423; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2424; AVX2OR512VL:       # %bb.0:
2425; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2426; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %ymm0
2427; AVX2OR512VL-NEXT:    retq
2428;
2429; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2430; XOPAVX1:       # %bb.0:
2431; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0]
2432; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2433; XOPAVX1-NEXT:    retq
2434;
2435; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2436; XOPAVX2:       # %bb.0:
2437; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2438; XOPAVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
2439; XOPAVX2-NEXT:    retq
2440  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32>
2441  ret <32 x i8> %shuffle
2442}
2443
2444define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) {
2445; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2446; AVX1:       # %bb.0:
2447; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2448; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
2449; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
2450; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2451; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2452; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2453; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
2454; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2455; AVX1-NEXT:    retq
2456;
2457; AVX2-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2458; AVX2-SLOW:       # %bb.0:
2459; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2460; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2461; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2462; AVX2-SLOW-NEXT:    retq
2463;
2464; AVX2-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2465; AVX2-FAST:       # %bb.0:
2466; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2467; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
2468; AVX2-FAST-NEXT:    retq
2469;
2470; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2471; AVX512VLBW-SLOW:       # %bb.0:
2472; AVX512VLBW-SLOW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2473; AVX512VLBW-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2474; AVX512VLBW-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2475; AVX512VLBW-SLOW-NEXT:    retq
2476;
2477; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2478; AVX512VLBW-FAST:       # %bb.0:
2479; AVX512VLBW-FAST-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2480; AVX512VLBW-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
2481; AVX512VLBW-FAST-NEXT:    retq
2482;
2483; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2484; AVX512VLVBMI:       # %bb.0:
2485; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,32,0,32,0,32,0,32,0,32,0,32,0,32,0,32,16,48,16,48,16,48,16,48,16,48,16,48,16,48,16,48]
2486; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
2487; AVX512VLVBMI-NEXT:    retq
2488;
2489; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2490; XOPAVX1:       # %bb.0:
2491; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2492; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2493; XOPAVX1-NEXT:    vbroadcastss {{.*#+}} xmm4 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16]
2494; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm3, %xmm2
2495; XOPAVX1-NEXT:    vpperm %xmm4, %xmm1, %xmm0, %xmm0
2496; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2497; XOPAVX1-NEXT:    retq
2498;
2499; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2500; XOPAVX2:       # %bb.0:
2501; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2502; XOPAVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2503; XOPAVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2504; XOPAVX2-NEXT:    retq
2505  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
2506  ret <32 x i8> %shuffle
2507}
2508
2509define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31(<32 x i8> %a, <32 x i8> %b) {
2510; AVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2511; AVX1:       # %bb.0:
2512; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2513; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2514; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2515; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
2516; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2517; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2518; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2519; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2520; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2521; AVX1-NEXT:    retq
2522;
2523; AVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2524; AVX2:       # %bb.0:
2525; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2526; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2527; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2528; AVX2-NEXT:    retq
2529;
2530; AVX512VLBW-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2531; AVX512VLBW:       # %bb.0:
2532; AVX512VLBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2533; AVX512VLBW-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2534; AVX512VLBW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2535; AVX512VLBW-NEXT:    retq
2536;
2537; AVX512VLVBMI-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2538; AVX512VLVBMI:       # %bb.0:
2539; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,40,41,42,43,44,45,46,47,16,16,16,16,16,16,16,16,56,57,58,59,60,61,62,63]
2540; AVX512VLVBMI-NEXT:    vpermi2b %ymm0, %ymm1, %ymm2
2541; AVX512VLVBMI-NEXT:    vmovdqa %ymm2, %ymm0
2542; AVX512VLVBMI-NEXT:    retq
2543;
2544; XOPAVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2545; XOPAVX1:       # %bb.0:
2546; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2547; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2548; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,24,25,26,27,28,29,30,31]
2549; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm3, %xmm2
2550; XOPAVX1-NEXT:    vpperm %xmm4, %xmm0, %xmm1, %xmm0
2551; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2552; XOPAVX1-NEXT:    retq
2553;
2554; XOPAVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2555; XOPAVX2:       # %bb.0:
2556; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2557; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2558; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2559; XOPAVX2-NEXT:    retq
2560  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2561  ret <32 x i8> %shuffle
2562}
2563
2564define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24(<32 x i8> %a, <32 x i8> %b) {
2565; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2566; AVX1:       # %bb.0:
2567; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2568; AVX1-NEXT:    vmovq {{.*#+}} xmm3 = [15,14,13,12,11,10,9,8,0,0,0,0,0,0,0,0]
2569; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
2570; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
2571; AVX1-NEXT:    vmovq {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,0,0,0,0,0,0,0,0]
2572; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
2573; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
2574; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
2575; AVX1-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
2576; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2577; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2578; AVX1-NEXT:    retq
2579;
2580; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2581; AVX2:       # %bb.0:
2582; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2583; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2584; AVX2-NEXT:    retq
2585;
2586; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2587; AVX512VLBW:       # %bb.0:
2588; AVX512VLBW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2589; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2590; AVX512VLBW-NEXT:    retq
2591;
2592; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2593; AVX512VLVBMI:       # %bb.0:
2594; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,47,46,45,44,43,42,41,40,23,22,21,20,19,18,17,16,63,62,61,60,59,58,57,56]
2595; AVX512VLVBMI-NEXT:    vpermi2b %ymm0, %ymm1, %ymm2
2596; AVX512VLVBMI-NEXT:    vmovdqa %ymm2, %ymm0
2597; AVX512VLVBMI-NEXT:    retq
2598;
2599; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2600; XOPAVX1:       # %bb.0:
2601; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2602; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2603; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24]
2604; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm3, %xmm2
2605; XOPAVX1-NEXT:    vpperm %xmm4, %xmm0, %xmm1, %xmm0
2606; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2607; XOPAVX1-NEXT:    retq
2608;
2609; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2610; XOPAVX2:       # %bb.0:
2611; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2612; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2613; XOPAVX2-NEXT:    retq
2614  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
2615  ret <32 x i8> %shuffle
2616}
2617
2618define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16(<32 x i8> %a, <32 x i8> %b) {
2619; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2620; AVX1:       # %bb.0:
2621; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2622; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2623; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2624; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [14,12,10,8,6,4,2,0,15,13,11,9,7,5,3,1]
2625; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
2626; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2627; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
2628; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2629; AVX1-NEXT:    retq
2630;
2631; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2632; AVX2:       # %bb.0:
2633; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
2634; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
2635; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2636; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2637; AVX2-NEXT:    retq
2638;
2639; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2640; AVX512VLBW:       # %bb.0:
2641; AVX512VLBW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
2642; AVX512VLBW-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
2643; AVX512VLBW-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2644; AVX512VLBW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2645; AVX512VLBW-NEXT:    retq
2646;
2647; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2648; AVX512VLVBMI:       # %bb.0:
2649; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,39,38,37,36,35,34,33,32,23,22,21,20,19,18,17,16,55,54,53,52,51,50,49,48]
2650; AVX512VLVBMI-NEXT:    vpermi2b %ymm0, %ymm1, %ymm2
2651; AVX512VLVBMI-NEXT:    vmovdqa %ymm2, %ymm0
2652; AVX512VLVBMI-NEXT:    retq
2653;
2654; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2655; XOPAVX1:       # %bb.0:
2656; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2657; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2658; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16]
2659; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm3, %xmm2
2660; XOPAVX1-NEXT:    vpperm %xmm4, %xmm0, %xmm1, %xmm0
2661; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2662; XOPAVX1-NEXT:    retq
2663;
2664; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2665; XOPAVX2:       # %bb.0:
2666; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
2667; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
2668; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
2669; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2670; XOPAVX2-NEXT:    retq
2671  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
2672  ret <32 x i8> %shuffle
2673}
2674
2675define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) {
2676; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2677; AVX1:       # %bb.0:
2678; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2679; AVX1-NEXT:    vpmovsxbw {{.*#+}} xmm2 = [0,0,0,0,0,0,0,1]
2680; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2681; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2682; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2683; AVX1-NEXT:    retq
2684;
2685; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2686; AVX2OR512VL:       # %bb.0:
2687; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
2688; AVX2OR512VL-NEXT:    retq
2689;
2690; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2691; XOPAVX1:       # %bb.0:
2692; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2693; XOPAVX1-NEXT:    vpmovsxbw {{.*#+}} xmm2 = [0,0,0,0,0,0,0,1]
2694; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2695; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2696; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2697; XOPAVX1-NEXT:    retq
2698;
2699; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2700; XOPAVX2:       # %bb.0:
2701; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
2702; XOPAVX2-NEXT:    retq
2703  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16>
2704  ret <32 x i8> %shuffle
2705}
2706
2707define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) {
2708; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2709; AVX1:       # %bb.0:
2710; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2711; AVX1-NEXT:    vpmovsxwd {{.*#+}} xmm2 = [0,0,0,512]
2712; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2713; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2714; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2715; AVX1-NEXT:    retq
2716;
2717; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2718; AVX2OR512VL:       # %bb.0:
2719; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
2720; AVX2OR512VL-NEXT:    retq
2721;
2722; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2723; XOPAVX1:       # %bb.0:
2724; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2725; XOPAVX1-NEXT:    vpmovsxwd {{.*#+}} xmm2 = [0,0,0,512]
2726; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2727; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2728; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2729; XOPAVX1-NEXT:    retq
2730;
2731; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2732; XOPAVX2:       # %bb.0:
2733; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
2734; XOPAVX2-NEXT:    retq
2735  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16>
2736  ret <32 x i8> %shuffle
2737}
2738
2739define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2740; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2741; AVX1:       # %bb.0:
2742; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2743; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [0,7]
2744; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2745; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2746; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2747; AVX1-NEXT:    retq
2748;
2749; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2750; AVX2OR512VL:       # %bb.0:
2751; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
2752; AVX2OR512VL-NEXT:    retq
2753;
2754; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2755; XOPAVX1:       # %bb.0:
2756; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2757; XOPAVX1-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [0,7]
2758; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2759; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2760; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2761; XOPAVX1-NEXT:    retq
2762;
2763; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2764; XOPAVX2:       # %bb.0:
2765; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
2766; XOPAVX2-NEXT:    retq
2767  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2768  ret <32 x i8> %shuffle
2769}
2770
2771define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2772; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2773; AVX1:       # %bb.0:
2774; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2775; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2776; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2777; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2778; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2779; AVX1-NEXT:    retq
2780;
2781; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2782; AVX2OR512VL:       # %bb.0:
2783; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
2784; AVX2OR512VL-NEXT:    retq
2785;
2786; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2787; XOPAVX1:       # %bb.0:
2788; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2789; XOPAVX1-NEXT:    vmovq {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2790; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2791; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2792; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2793; XOPAVX1-NEXT:    retq
2794;
2795; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2796; XOPAVX2:       # %bb.0:
2797; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
2798; XOPAVX2-NEXT:    retq
2799  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2800  ret <32 x i8> %shuffle
2801}
2802
2803define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2804; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2805; AVX1:       # %bb.0:
2806; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2807; AVX1-NEXT:    vmovd {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2808; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2809; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2810; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2811; AVX1-NEXT:    retq
2812;
2813; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2814; AVX2OR512VL:       # %bb.0:
2815; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2816; AVX2OR512VL-NEXT:    retq
2817;
2818; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2819; XOPAVX1:       # %bb.0:
2820; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2821; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2822; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2823; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2824; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2825; XOPAVX1-NEXT:    retq
2826;
2827; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2828; XOPAVX2:       # %bb.0:
2829; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2830; XOPAVX2-NEXT:    retq
2831  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2832  ret <32 x i8> %shuffle
2833}
2834
2835define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2836; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2837; AVX1:       # %bb.0:
2838; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2839; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [15,0]
2840; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2841; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2842; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2843; AVX1-NEXT:    retq
2844;
2845; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2846; AVX2OR512VL:       # %bb.0:
2847; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2848; AVX2OR512VL-NEXT:    retq
2849;
2850; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2851; XOPAVX1:       # %bb.0:
2852; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
2853; XOPAVX1-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [15,0]
2854; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
2855; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
2856; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2857; XOPAVX1-NEXT:    retq
2858;
2859; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2860; XOPAVX2:       # %bb.0:
2861; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2862; XOPAVX2-NEXT:    retq
2863  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2864  ret <32 x i8> %shuffle
2865}
2866
2867define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
2868; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2869; AVX1:       # %bb.0:
2870; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2871; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2872; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2873; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2874; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2875; AVX1-NEXT:    retq
2876;
2877; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2878; AVX2OR512VL:       # %bb.0:
2879; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2880; AVX2OR512VL-NEXT:    retq
2881;
2882; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2883; XOPAVX1:       # %bb.0:
2884; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2885; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2886; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2887; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2888; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2889; XOPAVX1-NEXT:    retq
2890;
2891; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2892; XOPAVX2:       # %bb.0:
2893; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2894; XOPAVX2-NEXT:    retq
2895  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
2896  ret <32 x i8> %shuffle
2897}
2898
2899define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
2900; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2901; AVX1:       # %bb.0:
2902; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2903; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2904; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2905; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2906; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2907; AVX1-NEXT:    retq
2908;
2909; AVX2OR512VL-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2910; AVX2OR512VL:       # %bb.0:
2911; AVX2OR512VL-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
2912; AVX2OR512VL-NEXT:    retq
2913;
2914; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2915; XOPAVX1:       # %bb.0:
2916; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2917; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2918; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2919; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2920; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2921; XOPAVX1-NEXT:    retq
2922;
2923; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2924; XOPAVX2:       # %bb.0:
2925; XOPAVX2-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
2926; XOPAVX2-NEXT:    retq
2927  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
2928  ret <32 x i8> %shuffle
2929}
2930
2931define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
2932; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2933; AVX1:       # %bb.0:
2934; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2935; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2936; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2937; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2938; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2939; AVX1-NEXT:    retq
2940;
2941; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2942; AVX2:       # %bb.0:
2943; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2944; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2945; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2946; AVX2-NEXT:    retq
2947;
2948; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2949; AVX512VLBW:       # %bb.0:
2950; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2951; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2952; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2953; AVX512VLBW-NEXT:    retq
2954;
2955; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2956; AVX512VLVBMI:       # %bb.0:
2957; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,24,56,25,57,26,58,27,59,28,60,29,61,30,62,31,63]
2958; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
2959; AVX512VLVBMI-NEXT:    retq
2960;
2961; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2962; XOPAVX1:       # %bb.0:
2963; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2964; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2965; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2966; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2967; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2968; XOPAVX1-NEXT:    retq
2969;
2970; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2971; XOPAVX2:       # %bb.0:
2972; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2973; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2974; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2975; XOPAVX2-NEXT:    retq
2976  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
2977  ret <32 x i8> %shuffle
2978}
2979
2980define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
2981; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2982; AVX1:       # %bb.0:
2983; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2984; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
2985; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2986; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2987; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2988; AVX1-NEXT:    retq
2989;
2990; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2991; AVX2:       # %bb.0:
2992; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2993; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2994; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2995; AVX2-NEXT:    retq
2996;
2997; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2998; AVX512VLBW:       # %bb.0:
2999; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
3000; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
3001; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
3002; AVX512VLBW-NEXT:    retq
3003;
3004; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
3005; AVX512VLVBMI:       # %bb.0:
3006; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47,16,48,17,49,18,50,19,51,20,52,21,53,22,54,23,55]
3007; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3008; AVX512VLVBMI-NEXT:    retq
3009;
3010; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
3011; XOPAVX1:       # %bb.0:
3012; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
3013; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
3014; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
3015; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3016; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
3017; XOPAVX1-NEXT:    retq
3018;
3019; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
3020; XOPAVX2:       # %bb.0:
3021; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
3022; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
3023; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
3024; XOPAVX2-NEXT:    retq
3025  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
3026  ret <32 x i8> %shuffle
3027}
3028
3029define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3030; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3031; AVX1:       # %bb.0:
3032; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
3033; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3034; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3035; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3036; AVX1-NEXT:    retq
3037;
3038; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3039; AVX2OR512VL:       # %bb.0:
3040; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
3041; AVX2OR512VL-NEXT:    retq
3042;
3043; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3044; XOPAVX1:       # %bb.0:
3045; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
3046; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3047; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3048; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3049; XOPAVX1-NEXT:    retq
3050;
3051; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3052; XOPAVX2:       # %bb.0:
3053; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
3054; XOPAVX2-NEXT:    retq
3055  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3056  ret <32 x i8> %shuffle
3057}
3058
3059define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3060; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3061; AVX1:       # %bb.0:
3062; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
3063; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3064; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
3065; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3066; AVX1-NEXT:    retq
3067;
3068; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3069; AVX2OR512VL:       # %bb.0:
3070; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
3071; AVX2OR512VL-NEXT:    retq
3072;
3073; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3074; XOPAVX1:       # %bb.0:
3075; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
3076; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3077; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
3078; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3079; XOPAVX1-NEXT:    retq
3080;
3081; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3082; XOPAVX2:       # %bb.0:
3083; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
3084; XOPAVX2-NEXT:    retq
3085  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3086  ret <32 x i8> %shuffle
3087}
3088
3089define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3090; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3091; AVX1:       # %bb.0:
3092; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
3093; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3094; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
3095; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3096; AVX1-NEXT:    retq
3097;
3098; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3099; AVX2OR512VL:       # %bb.0:
3100; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
3101; AVX2OR512VL-NEXT:    retq
3102;
3103; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3104; XOPAVX1:       # %bb.0:
3105; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
3106; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3107; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
3108; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3109; XOPAVX1-NEXT:    retq
3110;
3111; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3112; XOPAVX2:       # %bb.0:
3113; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
3114; XOPAVX2-NEXT:    retq
3115  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3116  ret <32 x i8> %shuffle
3117}
3118
3119define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3120; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3121; AVX1:       # %bb.0:
3122; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
3123; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3124; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
3125; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3126; AVX1-NEXT:    retq
3127;
3128; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3129; AVX2OR512VL:       # %bb.0:
3130; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
3131; AVX2OR512VL-NEXT:    retq
3132;
3133; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3134; XOPAVX1:       # %bb.0:
3135; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
3136; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3137; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
3138; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3139; XOPAVX1-NEXT:    retq
3140;
3141; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3142; XOPAVX2:       # %bb.0:
3143; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
3144; XOPAVX2-NEXT:    retq
3145  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3146  ret <32 x i8> %shuffle
3147}
3148
3149define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3150; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3151; AVX1:       # %bb.0:
3152; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3153; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3154; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
3155; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3156; AVX1-NEXT:    retq
3157;
3158; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3159; AVX2OR512VL:       # %bb.0:
3160; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
3161; AVX2OR512VL-NEXT:    retq
3162;
3163; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3164; XOPAVX1:       # %bb.0:
3165; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3166; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3167; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
3168; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3169; XOPAVX1-NEXT:    retq
3170;
3171; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3172; XOPAVX2:       # %bb.0:
3173; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
3174; XOPAVX2-NEXT:    retq
3175  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3176  ret <32 x i8> %shuffle
3177}
3178
3179define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) {
3180; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3181; AVX1:       # %bb.0:
3182; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3183; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3184; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
3185; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3186; AVX1-NEXT:    retq
3187;
3188; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3189; AVX2OR512VL:       # %bb.0:
3190; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
3191; AVX2OR512VL-NEXT:    retq
3192;
3193; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3194; XOPAVX1:       # %bb.0:
3195; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3196; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3197; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
3198; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3199; XOPAVX1-NEXT:    retq
3200;
3201; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3202; XOPAVX2:       # %bb.0:
3203; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
3204; XOPAVX2-NEXT:    retq
3205  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31>
3206  ret <32 x i8> %shuffle
3207}
3208
3209define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3210; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3211; AVX1:       # %bb.0:
3212; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3213; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3214; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3215; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3216; AVX1-NEXT:    retq
3217;
3218; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3219; AVX2OR512VL:       # %bb.0:
3220; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
3221; AVX2OR512VL-NEXT:    retq
3222;
3223; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3224; XOPAVX1:       # %bb.0:
3225; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3226; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3227; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3228; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3229; XOPAVX1-NEXT:    retq
3230;
3231; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3232; XOPAVX2:       # %bb.0:
3233; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
3234; XOPAVX2-NEXT:    retq
3235  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
3236  ret <32 x i8> %shuffle
3237}
3238
3239define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
3240; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3241; AVX1:       # %bb.0:
3242; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
3243; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3244; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3245; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3246; AVX1-NEXT:    retq
3247;
3248; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3249; AVX2OR512VL:       # %bb.0:
3250; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3251; AVX2OR512VL-NEXT:    retq
3252;
3253; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3254; XOPAVX1:       # %bb.0:
3255; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
3256; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3257; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3258; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3259; XOPAVX1-NEXT:    retq
3260;
3261; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3262; XOPAVX2:       # %bb.0:
3263; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3264; XOPAVX2-NEXT:    retq
3265  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
3266  ret <32 x i8> %shuffle
3267}
3268
3269define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3270; AVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3271; AVX1:       # %bb.0:
3272; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3273; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
3274; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3275; AVX1-NEXT:    retq
3276;
3277; AVX2OR512VL-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3278; AVX2OR512VL:       # %bb.0:
3279; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
3280; AVX2OR512VL-NEXT:    retq
3281;
3282; XOPAVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3283; XOPAVX1:       # %bb.0:
3284; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3285; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
3286; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3287; XOPAVX1-NEXT:    retq
3288;
3289; XOPAVX2-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3290; XOPAVX2:       # %bb.0:
3291; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
3292; XOPAVX2-NEXT:    retq
3293  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3294  ret <32 x i8> %shuffle
3295}
3296
3297define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3298; AVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3299; AVX1:       # %bb.0:
3300; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
3301; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3302; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
3303; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3304; AVX1-NEXT:    retq
3305;
3306; AVX2OR512VL-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3307; AVX2OR512VL:       # %bb.0:
3308; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
3309; AVX2OR512VL-NEXT:    retq
3310;
3311; XOPAVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3312; XOPAVX1:       # %bb.0:
3313; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
3314; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3315; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
3316; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3317; XOPAVX1-NEXT:    retq
3318;
3319; XOPAVX2-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3320; XOPAVX2:       # %bb.0:
3321; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
3322; XOPAVX2-NEXT:    retq
3323  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 14, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 poison, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3324  ret <32 x i8> %shuffle
3325}
3326
3327define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3328; AVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3329; AVX1:       # %bb.0:
3330; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3331; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3332; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3333; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3334; AVX1-NEXT:    retq
3335;
3336; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3337; AVX2OR512VL:       # %bb.0:
3338; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
3339; AVX2OR512VL-NEXT:    retq
3340;
3341; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3342; XOPAVX1:       # %bb.0:
3343; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3344; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3345; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3346; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3347; XOPAVX1-NEXT:    retq
3348;
3349; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3350; XOPAVX2:       # %bb.0:
3351; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
3352; XOPAVX2-NEXT:    retq
3353  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 4, i32 poison, i32 8, i32 8, i32 8, i32 8, i32 poison, i32 poison, i32 12, i32 poison, i32 28, i32 28, i32 28, i32 28, i32 poison, i32 poison, i32 poison, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
3354  ret <32 x i8> %shuffle
3355}
3356
3357define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
3358; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3359; AVX1:       # %bb.0:
3360; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
3361; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3362; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3363; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8]
3364; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3365; AVX1-NEXT:    retq
3366;
3367; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3368; AVX2OR512VL:       # %bb.0:
3369; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
3370; AVX2OR512VL-NEXT:    retq
3371;
3372; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3373; XOPAVX1:       # %bb.0:
3374; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
3375; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3376; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3377; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8]
3378; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3379; XOPAVX1-NEXT:    retq
3380;
3381; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3382; XOPAVX2:       # %bb.0:
3383; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
3384; XOPAVX2-NEXT:    retq
3385  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
3386  ret <32 x i8> %shuffle
3387}
3388
3389define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) {
3390; AVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3391; AVX1:       # %bb.0:
3392; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
3393; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm2[u,u,4,u,1,6],zero,zero,xmm2[0],zero,xmm2[11,u],zero,zero,zero,zero
3394; AVX1-NEXT:    vpshufb {{.*#+}} xmm4 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
3395; AVX1-NEXT:    vpor %xmm3, %xmm4, %xmm3
3396; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
3397; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm5 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
3398; AVX1-NEXT:    vpshufb {{.*#+}} xmm5 = xmm5[8,6,u,6,u,u,u,u,u,u,u,15,u,u,u,u]
3399; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3400; AVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
3401; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3]
3402; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
3403; AVX1-NEXT:    vpor %xmm2, %xmm1, %xmm1
3404; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u]
3405; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
3406; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
3407; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
3408; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
3409; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
3410; AVX1-NEXT:    retq
3411;
3412; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3413; AVX2:       # %bb.0:
3414; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3415; AVX2-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1]
3416; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
3417; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3418; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3419; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3420; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
3421; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3422; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
3423; AVX2-NEXT:    retq
3424;
3425; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3426; AVX512VLBW:       # %bb.0:
3427; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3428; AVX512VLBW-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1]
3429; AVX512VLBW-NEXT:    vpermd %ymm0, %ymm2, %ymm2
3430; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3431; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3432; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3433; AVX512VLBW-NEXT:    vpor %ymm0, %ymm1, %ymm0
3434; AVX512VLBW-NEXT:    movl $134948620, %eax # imm = 0x80B270C
3435; AVX512VLBW-NEXT:    kmovd %eax, %k1
3436; AVX512VLBW-NEXT:    vmovdqu8 %ymm2, %ymm0 {%k1}
3437; AVX512VLBW-NEXT:    retq
3438;
3439; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3440; AVX512VLVBMI:       # %bb.0:
3441; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [10,13,44,45,3,3,28,8,49,54,61,12,1,44,16,19,52,51,20,51,17,22,5,0,16,10,27,39,4,2,4,7]
3442; AVX512VLVBMI-NEXT:    vpermi2b %ymm0, %ymm1, %ymm2
3443; AVX512VLVBMI-NEXT:    vmovdqa %ymm2, %ymm0
3444; AVX512VLVBMI-NEXT:    retq
3445;
3446; XOPAVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3447; XOPAVX1:       # %bb.0:
3448; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
3449; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm3 = xmm1[u,u],xmm2[4],xmm1[u],xmm2[1,6],xmm1[5,0],xmm2[0],xmm1[10],xmm2[11],xmm1[u,4,2,4,7]
3450; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
3451; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm5 = xmm4[4,3,u,3,u,u,u,u,u,u,u],xmm0[7],xmm4[u,u,u,u]
3452; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3453; XOPAVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
3454; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],xmm2[12],xmm1[8,u,u,u,12,1,u],xmm2[0,3]
3455; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],xmm4[1,6,13],xmm0[u,u,12,u,u]
3456; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
3457; XOPAVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
3458; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
3459; XOPAVX1-NEXT:    retq
3460;
3461; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3462; XOPAVX2:       # %bb.0:
3463; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3464; XOPAVX2-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1]
3465; XOPAVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
3466; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3467; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3468; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3469; XOPAVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
3470; XOPAVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3471; XOPAVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
3472; XOPAVX2-NEXT:    retq
3473  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39>
3474  ret <32 x i8> %shuffle
3475}
3476
3477define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
3478; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3479; AVX1:       # %bb.0:
3480; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3481; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3482; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3483; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3484; AVX1-NEXT:    retq
3485;
3486; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3487; AVX2:       # %bb.0:
3488; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3489; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3490; AVX2-NEXT:    retq
3491;
3492; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3493; AVX512VLBW:       # %bb.0:
3494; AVX512VLBW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3495; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3496; AVX512VLBW-NEXT:    retq
3497;
3498; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3499; AVX512VLVBMI:       # %bb.0:
3500; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40]
3501; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3502; AVX512VLVBMI-NEXT:    retq
3503;
3504; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3505; XOPAVX1:       # %bb.0:
3506; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3507; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3508; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3509; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3510; XOPAVX1-NEXT:    retq
3511;
3512; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3513; XOPAVX2:       # %bb.0:
3514; XOPAVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3515; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3516; XOPAVX2-NEXT:    retq
3517  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
3518  ret <32 x i8> %shuffle
3519}
3520
3521define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
3522; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3523; AVX1:       # %bb.0:
3524; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3525; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3526; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3527; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3528; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3529; AVX1-NEXT:    retq
3530;
3531; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3532; AVX2:       # %bb.0:
3533; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3534; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3535; AVX2-NEXT:    retq
3536;
3537; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3538; AVX512VLBW:       # %bb.0:
3539; AVX512VLBW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3540; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3541; AVX512VLBW-NEXT:    retq
3542;
3543; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3544; AVX512VLVBMI:       # %bb.0:
3545; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40]
3546; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3547; AVX512VLVBMI-NEXT:    retq
3548;
3549; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3550; XOPAVX1:       # %bb.0:
3551; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3552; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3553; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3554; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3555; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3556; XOPAVX1-NEXT:    retq
3557;
3558; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3559; XOPAVX2:       # %bb.0:
3560; XOPAVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3561; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3562; XOPAVX2-NEXT:    retq
3563  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
3564  ret <32 x i8> %shuffle
3565}
3566
3567define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
3568; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3569; AVX1:       # %bb.0:
3570; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
3571; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3572; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3573; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3574; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3575; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3576; AVX1-NEXT:    retq
3577;
3578; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3579; AVX2:       # %bb.0:
3580; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3581; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3582; AVX2-NEXT:    retq
3583;
3584; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3585; AVX512VLBW:       # %bb.0:
3586; AVX512VLBW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3587; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3588; AVX512VLBW-NEXT:    retq
3589;
3590; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3591; AVX512VLVBMI:       # %bb.0:
3592; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56]
3593; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3594; AVX512VLVBMI-NEXT:    retq
3595;
3596; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3597; XOPAVX1:       # %bb.0:
3598; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
3599; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3600; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3601; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3602; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3603; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3604; XOPAVX1-NEXT:    retq
3605;
3606; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3607; XOPAVX2:       # %bb.0:
3608; XOPAVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3609; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3610; XOPAVX2-NEXT:    retq
3611  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
3612  ret <32 x i8> %shuffle
3613}
3614
3615define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
3616; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3617; AVX1:       # %bb.0:
3618; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
3619; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3620; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3621; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3622; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3623; AVX1-NEXT:    retq
3624;
3625; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3626; AVX2:       # %bb.0:
3627; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3628; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3629; AVX2-NEXT:    retq
3630;
3631; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3632; AVX512VLBW:       # %bb.0:
3633; AVX512VLBW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3634; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3635; AVX512VLBW-NEXT:    retq
3636;
3637; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3638; AVX512VLVBMI:       # %bb.0:
3639; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56]
3640; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3641; AVX512VLVBMI-NEXT:    retq
3642;
3643; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3644; XOPAVX1:       # %bb.0:
3645; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
3646; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3647; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
3648; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
3649; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3650; XOPAVX1-NEXT:    retq
3651;
3652; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3653; XOPAVX2:       # %bb.0:
3654; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3655; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3656; XOPAVX2-NEXT:    retq
3657  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
3658  ret <32 x i8> %shuffle
3659}
3660
3661define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47(<32 x i8> %a, <32 x i8> %b) {
3662; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3663; AVX1:       # %bb.0:
3664; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3665; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3666; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
3667; AVX1-NEXT:    retq
3668;
3669; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3670; AVX2:       # %bb.0:
3671; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3672; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3673; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
3674; AVX2-NEXT:    retq
3675;
3676; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3677; AVX512VLBW:       # %bb.0:
3678; AVX512VLBW-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3679; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3680; AVX512VLBW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
3681; AVX512VLBW-NEXT:    retq
3682;
3683; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3684; AVX512VLVBMI:       # %bb.0:
3685; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47]
3686; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3687; AVX512VLVBMI-NEXT:    retq
3688;
3689; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3690; XOPAVX1:       # %bb.0:
3691; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3692; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3693; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
3694; XOPAVX1-NEXT:    retq
3695;
3696; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3697; XOPAVX2:       # %bb.0:
3698; XOPAVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3699; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3700; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
3701; XOPAVX2-NEXT:    retq
3702  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
3703  ret <32 x i8> %shuffle
3704}
3705
3706define <32 x i8> @shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47(<32 x i8> %a, <32 x i8> %b) {
3707; AVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3708; AVX1:       # %bb.0:
3709; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
3710; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
3711; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3712; AVX1-NEXT:    retq
3713;
3714; AVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3715; AVX2:       # %bb.0:
3716; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3717; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3718; AVX2-NEXT:    retq
3719;
3720; AVX512VLBW-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3721; AVX512VLBW:       # %bb.0:
3722; AVX512VLBW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3723; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3724; AVX512VLBW-NEXT:    retq
3725;
3726; AVX512VLVBMI-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3727; AVX512VLVBMI:       # %bb.0:
3728; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,32,34,36,38,40,42,44,46,33,35,37,39,41,43,45,47]
3729; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
3730; AVX512VLVBMI-NEXT:    retq
3731;
3732; XOPAVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3733; XOPAVX1:       # %bb.0:
3734; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
3735; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
3736; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
3737; XOPAVX1-NEXT:    retq
3738;
3739; XOPAVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3740; XOPAVX2:       # %bb.0:
3741; XOPAVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3742; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3743; XOPAVX2-NEXT:    retq
3744  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47>
3745  ret <32 x i8> %shuffle
3746}
3747
3748define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48(<32 x i8> %a) {
3749; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3750; AVX1:       # %bb.0:
3751; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3752; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3753; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3754; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3755; AVX1-NEXT:    retq
3756;
3757; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3758; AVX2OR512VL:       # %bb.0:
3759; AVX2OR512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
3760; AVX2OR512VL-NEXT:    retq
3761;
3762; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3763; XOPAVX1:       # %bb.0:
3764; XOPAVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3765; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3766; XOPAVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3767; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3768; XOPAVX1-NEXT:    retq
3769;
3770; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3771; XOPAVX2:       # %bb.0:
3772; XOPAVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
3773; XOPAVX2-NEXT:    retq
3774  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 48>
3775  ret <32 x i8> %shuffle
3776}
3777
3778define <32 x i8> @shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3779; AVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3780; AVX1:       # %bb.0:
3781; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3782; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3783; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3784; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3785; AVX1-NEXT:    retq
3786;
3787; AVX2OR512VL-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3788; AVX2OR512VL:       # %bb.0:
3789; AVX2OR512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3790; AVX2OR512VL-NEXT:    retq
3791;
3792; XOPAVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3793; XOPAVX1:       # %bb.0:
3794; XOPAVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3795; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3796; XOPAVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3797; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3798; XOPAVX1-NEXT:    retq
3799;
3800; XOPAVX2-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3801; XOPAVX2:       # %bb.0:
3802; XOPAVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3803; XOPAVX2-NEXT:    retq
3804  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 63, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
3805  ret <32 x i8> %shuffle
3806}
3807
3808;
3809; Shuffle to logical bit shifts
3810;
3811
3812define <32 x i8> @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30(<32 x i8> %a) {
3813; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3814; AVX1:       # %bb.0:
3815; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3816; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3817; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm0
3818; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3819; AVX1-NEXT:    retq
3820;
3821; AVX2OR512VL-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3822; AVX2OR512VL:       # %bb.0:
3823; AVX2OR512VL-NEXT:    vpsllw $8, %ymm0, %ymm0
3824; AVX2OR512VL-NEXT:    retq
3825;
3826; XOPAVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3827; XOPAVX1:       # %bb.0:
3828; XOPAVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3829; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3830; XOPAVX1-NEXT:    vpsllw $8, %xmm0, %xmm0
3831; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3832; XOPAVX1-NEXT:    retq
3833;
3834; XOPAVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3835; XOPAVX2:       # %bb.0:
3836; XOPAVX2-NEXT:    vpsllw $8, %ymm0, %ymm0
3837; XOPAVX2-NEXT:    retq
3838  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30>
3839  ret <32 x i8> %shuffle
3840}
3841
3842define <32 x i8> @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29(<32 x i8> %a) {
3843; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3844; AVX1:       # %bb.0:
3845; AVX1-NEXT:    vpslld $16, %xmm0, %xmm1
3846; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3847; AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
3848; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3849; AVX1-NEXT:    retq
3850;
3851; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3852; AVX2OR512VL:       # %bb.0:
3853; AVX2OR512VL-NEXT:    vpslld $16, %ymm0, %ymm0
3854; AVX2OR512VL-NEXT:    retq
3855;
3856; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3857; XOPAVX1:       # %bb.0:
3858; XOPAVX1-NEXT:    vpslld $16, %xmm0, %xmm1
3859; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3860; XOPAVX1-NEXT:    vpslld $16, %xmm0, %xmm0
3861; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3862; XOPAVX1-NEXT:    retq
3863;
3864; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3865; XOPAVX2:       # %bb.0:
3866; XOPAVX2-NEXT:    vpslld $16, %ymm0, %ymm0
3867; XOPAVX2-NEXT:    retq
3868  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 20, i32 21, i32 32, i32 32, i32 24, i32 25, i32 32, i32 32, i32 28, i32 29>
3869  ret <32 x i8> %shuffle
3870}
3871
3872define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25(<32 x i8> %a) {
3873; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3874; AVX1:       # %bb.0:
3875; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
3876; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3877; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
3878; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3879; AVX1-NEXT:    retq
3880;
3881; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3882; AVX2OR512VL:       # %bb.0:
3883; AVX2OR512VL-NEXT:    vpsllq $48, %ymm0, %ymm0
3884; AVX2OR512VL-NEXT:    retq
3885;
3886; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3887; XOPAVX1:       # %bb.0:
3888; XOPAVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
3889; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3890; XOPAVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
3891; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3892; XOPAVX1-NEXT:    retq
3893;
3894; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3895; XOPAVX2:       # %bb.0:
3896; XOPAVX2-NEXT:    vpsllq $48, %ymm0, %ymm0
3897; XOPAVX2-NEXT:    retq
3898  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25>
3899  ret <32 x i8> %shuffle
3900}
3901
3902define <32 x i8> @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz(<32 x i8> %a) {
3903; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3904; AVX1:       # %bb.0:
3905; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
3906; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3907; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3908; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3909; AVX1-NEXT:    retq
3910;
3911; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3912; AVX2OR512VL:       # %bb.0:
3913; AVX2OR512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
3914; AVX2OR512VL-NEXT:    retq
3915;
3916; XOPAVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3917; XOPAVX1:       # %bb.0:
3918; XOPAVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
3919; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3920; XOPAVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3921; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3922; XOPAVX1-NEXT:    retq
3923;
3924; XOPAVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3925; XOPAVX2:       # %bb.0:
3926; XOPAVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
3927; XOPAVX2-NEXT:    retq
3928  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32>
3929  ret <32 x i8> %shuffle
3930}
3931
3932define <32 x i8> @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz(<32 x i8> %a) {
3933; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3934; AVX1:       # %bb.0:
3935; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
3936; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3937; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
3938; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3939; AVX1-NEXT:    retq
3940;
3941; AVX2OR512VL-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3942; AVX2OR512VL:       # %bb.0:
3943; AVX2OR512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
3944; AVX2OR512VL-NEXT:    retq
3945;
3946; XOPAVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3947; XOPAVX1:       # %bb.0:
3948; XOPAVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
3949; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3950; XOPAVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
3951; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3952; XOPAVX1-NEXT:    retq
3953;
3954; XOPAVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3955; XOPAVX2:       # %bb.0:
3956; XOPAVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
3957; XOPAVX2-NEXT:    retq
3958  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 32, i32 32, i32 22, i32 23, i32 32, i32 32, i32 26, i32 27, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32>
3959  ret <32 x i8> %shuffle
3960}
3961
3962define <32 x i8> @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3963; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3964; AVX1:       # %bb.0:
3965; AVX1-NEXT:    vpsrlq $56, %xmm0, %xmm1
3966; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3967; AVX1-NEXT:    vpsrlq $56, %xmm0, %xmm0
3968; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3969; AVX1-NEXT:    retq
3970;
3971; AVX2OR512VL-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3972; AVX2OR512VL:       # %bb.0:
3973; AVX2OR512VL-NEXT:    vpsrlq $56, %ymm0, %ymm0
3974; AVX2OR512VL-NEXT:    retq
3975;
3976; XOPAVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3977; XOPAVX1:       # %bb.0:
3978; XOPAVX1-NEXT:    vpsrlq $56, %xmm0, %xmm1
3979; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
3980; XOPAVX1-NEXT:    vpsrlq $56, %xmm0, %xmm0
3981; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3982; XOPAVX1-NEXT:    retq
3983;
3984; XOPAVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3985; XOPAVX2:       # %bb.0:
3986; XOPAVX2-NEXT:    vpsrlq $56, %ymm0, %ymm0
3987; XOPAVX2-NEXT:    retq
3988  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
3989  ret <32 x i8> %shuffle
3990}
3991
3992define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3993; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
3994; AVX1:       # %bb.0:
3995; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3996; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
3997; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3998; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3999; AVX1-NEXT:    retq
4000;
4001; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
4002; AVX2OR512VL:       # %bb.0:
4003; AVX2OR512VL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
4004; AVX2OR512VL-NEXT:    retq
4005;
4006; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
4007; XOPAVX1:       # %bb.0:
4008; XOPAVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
4009; XOPAVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
4010; XOPAVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
4011; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4012; XOPAVX1-NEXT:    retq
4013;
4014; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
4015; XOPAVX2:       # %bb.0:
4016; XOPAVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
4017; XOPAVX2-NEXT:    retq
4018  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4019  ret <32 x i8> %shuffle
4020}
4021
4022define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i8> %a) {
4023; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4024; AVX1:       # %bb.0:
4025; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4026; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
4027; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4028; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4029; AVX1-NEXT:    retq
4030;
4031; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4032; AVX2OR512VL:       # %bb.0:
4033; AVX2OR512VL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4034; AVX2OR512VL-NEXT:    retq
4035;
4036; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4037; XOPAVX1:       # %bb.0:
4038; XOPAVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4039; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
4040; XOPAVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4041; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4042; XOPAVX1-NEXT:    retq
4043;
4044; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4045; XOPAVX2:       # %bb.0:
4046; XOPAVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4047; XOPAVX2-NEXT:    retq
4048  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0>
4049  ret <32 x i8> %shuffle
4050}
4051
4052define <32 x i8> @shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i8> %a) {
4053; AVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4054; AVX1:       # %bb.0:
4055; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4056; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
4057; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4058; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4059; AVX1-NEXT:    retq
4060;
4061; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4062; AVX2OR512VL:       # %bb.0:
4063; AVX2OR512VL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
4064; AVX2OR512VL-NEXT:    retq
4065;
4066; XOPAVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4067; XOPAVX1:       # %bb.0:
4068; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4069; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
4070; XOPAVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4071; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4072; XOPAVX1-NEXT:    retq
4073;
4074; XOPAVX2-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4075; XOPAVX2:       # %bb.0:
4076; XOPAVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
4077; XOPAVX2-NEXT:    retq
4078  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0>
4079  ret <32 x i8> %shuffle
4080}
4081
4082define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz(<32 x i8> %a) {
4083; AVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4084; AVX1:       # %bb.0:
4085; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4086; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4087; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
4088; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
4089; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4090; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4091; AVX1-NEXT:    retq
4092;
4093; AVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4094; AVX2:       # %bb.0:
4095; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
4096; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4097; AVX2-NEXT:    retq
4098;
4099; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4100; AVX512VLBW:       # %bb.0:
4101; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
4102; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4103; AVX512VLBW-NEXT:    retq
4104;
4105; AVX512VLVBMI-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4106; AVX512VLVBMI:       # %bb.0:
4107; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [56,1,2,3,57,5,6,7,58,9,10,11,59,13,14,15,60,17,18,19,61,21,22,23,62,25,26,27,63,29,30,31]
4108; AVX512VLVBMI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4109; AVX512VLVBMI-NEXT:    vpermt2b %ymm0, %ymm2, %ymm1
4110; AVX512VLVBMI-NEXT:    vmovdqa %ymm1, %ymm0
4111; AVX512VLVBMI-NEXT:    retq
4112;
4113; XOPAVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4114; XOPAVX1:       # %bb.0:
4115; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4116; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4117; XOPAVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
4118; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
4119; XOPAVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4120; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4121; XOPAVX1-NEXT:    retq
4122;
4123; XOPAVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4124; XOPAVX2:       # %bb.0:
4125; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
4126; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4127; XOPAVX2-NEXT:    retq
4128  %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 56, i32 1, i32 2, i32 3, i32 57, i32 5, i32 6, i32 7, i32 58, i32 9, i32 10, i32 11, i32 59, i32 13, i32 14, i32 15, i32 60, i32 17, i32 18, i32 19, i32 61, i32 21, i32 22, i32 23, i32 62, i32 25, i32 26, i32 27, i32 63, i32 29, i32 30, i32 31>
4129  ret <32 x i8> %shuffle
4130}
4131
4132; PR121823
4133define <32 x i8> @shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a)  {
4134; AVX1-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4135; AVX1:       # %bb.0:
4136; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4137; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[1,9,0,3]
4138; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,9,0,3,11,2,5,13,4,7,15,6],zero,zero,zero,zero
4139; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
4140; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[11,2,5,13,4,7,15,6],zero,zero,zero,zero,zero,zero,zero,zero
4141; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4142; AVX1-NEXT:    retq
4143;
4144; AVX2-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4145; AVX2:       # %bb.0:
4146; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u]
4147; AVX2-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,0,0]
4148; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
4149; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4150; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
4151; AVX2-NEXT:    retq
4152;
4153; AVX512VLBW-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4154; AVX512VLBW:       # %bb.0:
4155; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u]
4156; AVX512VLBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
4157; AVX512VLBW-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [0,1,2,4,5,6,14,15]
4158; AVX512VLBW-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0
4159; AVX512VLBW-NEXT:    retq
4160;
4161; AVX512VLVBMI-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4162; AVX512VLVBMI:       # %bb.0:
4163; AVX512VLVBMI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4164; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,9,0,3,11,2,5,13,4,7,15,6,17,25,16,19,27,18,21,29,20,23,31,22,56,57,58,59,60,61,62,63]
4165; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
4166; AVX512VLVBMI-NEXT:    retq
4167;
4168; XOPAVX1-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4169; XOPAVX1:       # %bb.0:
4170; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4171; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[1,9,0,3,11,2,5,13,4,7,15,6],xmm1[1,9,0,3]
4172; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[11,2,5,13,4,7,15,6],zero,zero,zero,zero,zero,zero,zero,zero
4173; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4174; XOPAVX1-NEXT:    retq
4175;
4176; XOPAVX2-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz:
4177; XOPAVX2:       # %bb.0:
4178; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u]
4179; XOPAVX2-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,0,0]
4180; XOPAVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
4181; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4182; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
4183; XOPAVX2-NEXT:    retq
4184  %r = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 0, i32 3, i32 11, i32 2, i32 5, i32 13, i32 4, i32 7, i32 15, i32 6, i32 17, i32 25, i32 16, i32 19, i32 27, i32 18, i32 21, i32 29, i32 20, i32 23, i32 31, i32 22, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48>
4185  ret <32 x i8> %r
4186}
4187
4188define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4189; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4190; AVX1:       # %bb.0:
4191; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4192; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
4193; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4194; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4195; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4196; AVX1-NEXT:    retq
4197;
4198; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4199; AVX2OR512VL:       # %bb.0:
4200; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4201; AVX2OR512VL-NEXT:    retq
4202;
4203; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4204; XOPAVX1:       # %bb.0:
4205; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4206; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
4207; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4208; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4209; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4210; XOPAVX1-NEXT:    retq
4211;
4212; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4213; XOPAVX2:       # %bb.0:
4214; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4215; XOPAVX2-NEXT:    retq
4216  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4217  ret <32 x i8> %shuffle
4218}
4219
4220define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4221; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4222; AVX1:       # %bb.0:
4223; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4224; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4225; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4226; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4227; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4228; AVX1-NEXT:    retq
4229;
4230; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4231; AVX2OR512VL:       # %bb.0:
4232; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4233; AVX2OR512VL-NEXT:    retq
4234;
4235; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4236; XOPAVX1:       # %bb.0:
4237; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4238; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4239; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4240; XOPAVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4241; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4242; XOPAVX1-NEXT:    retq
4243;
4244; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4245; XOPAVX2:       # %bb.0:
4246; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4247; XOPAVX2-NEXT:    retq
4248  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4249  ret <32 x i8> %shuffle
4250}
4251
4252define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4253; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4254; AVX1:       # %bb.0:
4255; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4256; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4257; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4258; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4259; AVX1-NEXT:    retq
4260;
4261; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4262; AVX2OR512VL:       # %bb.0:
4263; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4264; AVX2OR512VL-NEXT:    retq
4265;
4266; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4267; XOPAVX1:       # %bb.0:
4268; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4269; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4270; XOPAVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4271; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4272; XOPAVX1-NEXT:    retq
4273;
4274; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4275; XOPAVX2:       # %bb.0:
4276; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4277; XOPAVX2-NEXT:    retq
4278  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4279  ret <32 x i8> %shuffle
4280}
4281
4282define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4283; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4284; AVX1:       # %bb.0:
4285; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4286; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4287; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
4288; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4289; AVX1-NEXT:    retq
4290;
4291; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4292; AVX2OR512VL:       # %bb.0:
4293; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4294; AVX2OR512VL-NEXT:    retq
4295;
4296; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4297; XOPAVX1:       # %bb.0:
4298; XOPAVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4299; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4300; XOPAVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
4301; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4302; XOPAVX1-NEXT:    retq
4303;
4304; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4305; XOPAVX2:       # %bb.0:
4306; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4307; XOPAVX2-NEXT:    retq
4308  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
4309  ret <32 x i8> %shuffle
4310}
4311
4312define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4313; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4314; AVX1:       # %bb.0:
4315; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4316; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4317; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4318; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4319; AVX1-NEXT:    retq
4320;
4321; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4322; AVX2OR512VL:       # %bb.0:
4323; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4324; AVX2OR512VL-NEXT:    retq
4325;
4326; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4327; XOPAVX1:       # %bb.0:
4328; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
4329; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4330; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4331; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4332; XOPAVX1-NEXT:    retq
4333;
4334; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4335; XOPAVX2:       # %bb.0:
4336; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4337; XOPAVX2-NEXT:    retq
4338  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4339  ret <32 x i8> %shuffle
4340}
4341
4342define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48(<32 x i8> %a, <32 x i8> %b) {
4343; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4344; AVX1:       # %bb.0:
4345; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4346; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
4347; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4348; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
4349; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4350; AVX1-NEXT:    retq
4351;
4352; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4353; AVX2OR512VL:       # %bb.0:
4354; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
4355; AVX2OR512VL-NEXT:    retq
4356;
4357; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4358; XOPAVX1:       # %bb.0:
4359; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4360; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
4361; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4362; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
4363; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4364; XOPAVX1-NEXT:    retq
4365;
4366; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4367; XOPAVX2:       # %bb.0:
4368; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
4369; XOPAVX2-NEXT:    retq
4370  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
4371  ret <32 x i8> %shuffle
4372}
4373
4374define <32 x i8> @shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16(<32 x i8> %a, <32 x i8> %b) {
4375; AVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4376; AVX1:       # %bb.0:
4377; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4378; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
4379; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4380; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
4381; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4382; AVX1-NEXT:    retq
4383;
4384; AVX2OR512VL-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4385; AVX2OR512VL:       # %bb.0:
4386; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
4387; AVX2OR512VL-NEXT:    retq
4388;
4389; XOPAVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4390; XOPAVX1:       # %bb.0:
4391; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4392; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
4393; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4394; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
4395; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4396; XOPAVX1-NEXT:    retq
4397;
4398; XOPAVX2-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4399; XOPAVX2:       # %bb.0:
4400; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
4401; XOPAVX2-NEXT:    retq
4402  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 00, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 16>
4403  ret <32 x i8> %shuffle
4404}
4405
4406define <32 x i8> @shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a, <32 x i8> %b) {
4407; AVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4408; AVX1:       # %bb.0:
4409; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4410; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
4411; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4412; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4413; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4414; AVX1-NEXT:    retq
4415;
4416; AVX2OR512VL-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4417; AVX2OR512VL:       # %bb.0:
4418; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4419; AVX2OR512VL-NEXT:    retq
4420;
4421; XOPAVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4422; XOPAVX1:       # %bb.0:
4423; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4424; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
4425; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4426; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4427; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4428; XOPAVX1-NEXT:    retq
4429;
4430; XOPAVX2-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4431; XOPAVX2:       # %bb.0:
4432; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4433; XOPAVX2-NEXT:    retq
4434  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
4435  ret <32 x i8> %shuffle
4436}
4437
4438define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16(<32 x i8> %a, <32 x i8> %b) {
4439; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4440; AVX1:       # %bb.0:
4441; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4442; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4443; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4444; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4445; AVX1-NEXT:    retq
4446;
4447; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4448; AVX2OR512VL:       # %bb.0:
4449; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16]
4450; AVX2OR512VL-NEXT:    retq
4451;
4452; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4453; XOPAVX1:       # %bb.0:
4454; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4455; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4456; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4457; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4458; XOPAVX1-NEXT:    retq
4459;
4460; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4461; XOPAVX2:       # %bb.0:
4462; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16]
4463; XOPAVX2-NEXT:    retq
4464  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16>
4465  ret <32 x i8> %shuffle
4466}
4467
4468define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4469; AVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4470; AVX1:       # %bb.0:
4471; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4472; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4473; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4474; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4475; AVX1-NEXT:    retq
4476;
4477; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4478; AVX2OR512VL:       # %bb.0:
4479; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4480; AVX2OR512VL-NEXT:    retq
4481;
4482; XOPAVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4483; XOPAVX1:       # %bb.0:
4484; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4485; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4486; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4487; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4488; XOPAVX1-NEXT:    retq
4489;
4490; XOPAVX2-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4491; XOPAVX2:       # %bb.0:
4492; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4493; XOPAVX2-NEXT:    retq
4494  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4495  ret <32 x i8> %shuffle
4496}
4497
4498; PR33740
4499define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31(<32 x i8> %a, <32 x i8> %b) {
4500; AVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4501; AVX1:       # %bb.0:
4502; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4503; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4504; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4505; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4506; AVX1-NEXT:    retq
4507;
4508; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4509; AVX2:       # %bb.0:
4510; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4511; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
4512; AVX2-NEXT:    retq
4513;
4514; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4515; AVX512VL:       # %bb.0:
4516; AVX512VL-NEXT:    vpmovsxbw {{.*#+}} ymm1 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
4517; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
4518; AVX512VL-NEXT:    retq
4519;
4520; XOPAVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4521; XOPAVX1:       # %bb.0:
4522; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4523; XOPAVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4524; XOPAVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4525; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4526; XOPAVX1-NEXT:    retq
4527;
4528; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4529; XOPAVX2:       # %bb.0:
4530; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4531; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
4532; XOPAVX2-NEXT:    retq
4533  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23, i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
4534  ret <32 x i8> %shuffle
4535}
4536
4537define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) {
4538; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4539; AVX1:       # %bb.0:
4540; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4541; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4542; AVX1-NEXT:    retq
4543;
4544; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4545; AVX2OR512VL:       # %bb.0:
4546; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4547; AVX2OR512VL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
4548; AVX2OR512VL-NEXT:    retq
4549;
4550; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4551; XOPAVX1:       # %bb.0:
4552; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4553; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4554; XOPAVX1-NEXT:    retq
4555;
4556; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4557; XOPAVX2:       # %bb.0:
4558; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4559; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
4560; XOPAVX2-NEXT:    retq
4561  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
4562  ret <32 x i8> %shuffle
4563}
4564
4565define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
4566; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4567; AVX1:       # %bb.0:
4568; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4569; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4570; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
4571; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4572; AVX1-NEXT:    retq
4573;
4574; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4575; AVX2:       # %bb.0:
4576; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
4577; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
4578; AVX2-NEXT:    retq
4579;
4580; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4581; AVX512VLBW:       # %bb.0:
4582; AVX512VLBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
4583; AVX512VLBW-NEXT:    vpbroadcastb %xmm0, %ymm0
4584; AVX512VLBW-NEXT:    retq
4585;
4586; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4587; AVX512VLVBMI-SLOW:       # %bb.0:
4588; AVX512VLVBMI-SLOW-NEXT:    vextracti128 $1, %ymm0, %xmm0
4589; AVX512VLVBMI-SLOW-NEXT:    vpbroadcastb %xmm0, %ymm0
4590; AVX512VLVBMI-SLOW-NEXT:    retq
4591;
4592; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4593; AVX512VLVBMI-FAST-ALL:       # %bb.0:
4594; AVX512VLVBMI-FAST-ALL-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
4595; AVX512VLVBMI-FAST-ALL-NEXT:    vpermb %ymm0, %ymm1, %ymm0
4596; AVX512VLVBMI-FAST-ALL-NEXT:    retq
4597;
4598; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4599; AVX512VLVBMI-FAST-PERLANE:       # %bb.0:
4600; AVX512VLVBMI-FAST-PERLANE-NEXT:    vextracti128 $1, %ymm0, %xmm0
4601; AVX512VLVBMI-FAST-PERLANE-NEXT:    vpbroadcastb %xmm0, %ymm0
4602; AVX512VLVBMI-FAST-PERLANE-NEXT:    retq
4603;
4604; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4605; XOPAVX1:       # %bb.0:
4606; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4607; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
4608; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
4609; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4610; XOPAVX1-NEXT:    retq
4611;
4612; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4613; XOPAVX2:       # %bb.0:
4614; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
4615; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
4616; XOPAVX2-NEXT:    retq
4617  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
4618  ret <32 x i8> %shuffle
4619}
4620
4621define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4622; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4623; AVX1:       # %bb.0:
4624; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4625; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
4626; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
4627; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4628; AVX1-NEXT:    retq
4629;
4630; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4631; AVX2:       # %bb.0:
4632; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
4633; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4634; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4635; AVX2-NEXT:    retq
4636;
4637; AVX512VLBW-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4638; AVX512VLBW:       # %bb.0:
4639; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %xmm1
4640; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4641; AVX512VLBW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4642; AVX512VLBW-NEXT:    retq
4643;
4644; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4645; AVX512VLVBMI:       # %bb.0:
4646; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
4647; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
4648; AVX512VLVBMI-NEXT:    retq
4649;
4650; XOP-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4651; XOP:       # %bb.0:
4652; XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0]
4653; XOP-NEXT:    retq
4654; AVX-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4655; AVX:       # %bb.0:
4656; AVX-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0]
4657; AVX-NEXT:    retq
4658  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
4659  ret <32 x i8> %shuffle
4660}
4661
4662define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4663; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4664; ALL:       # %bb.0:
4665; ALL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4666; ALL-NEXT:    retq
4667  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
4668  ret <32 x i8> %shuffle
4669}
4670
4671define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4672; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4673; AVX1:       # %bb.0:
4674; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4675; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4676; AVX1-NEXT:    retq
4677;
4678; AVX2OR512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4679; AVX2OR512VL:       # %bb.0:
4680; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
4681; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4682; AVX2OR512VL-NEXT:    retq
4683;
4684; XOPAVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4685; XOPAVX1:       # %bb.0:
4686; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4687; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4688; XOPAVX1-NEXT:    retq
4689;
4690; XOPAVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4691; XOPAVX2:       # %bb.0:
4692; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
4693; XOPAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4694; XOPAVX2-NEXT:    retq
4695  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
4696  ret <32 x i8> %shuffle
4697}
4698
4699; PR36933
4700define <32 x i8> @shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a0, <32 x i8> %a1) {
4701; AVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4702; AVX1:       # %bb.0:
4703; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4704; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4705; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4706; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4707; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4708; AVX1-NEXT:    retq
4709;
4710; AVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4711; AVX2:       # %bb.0:
4712; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4713; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4714; AVX2-NEXT:    retq
4715;
4716; AVX512VLBW-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4717; AVX512VLBW:       # %bb.0:
4718; AVX512VLBW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4719; AVX512VLBW-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4720; AVX512VLBW-NEXT:    retq
4721;
4722; AVX512VLVBMI-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4723; AVX512VLVBMI:       # %bb.0:
4724; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4725; AVX512VLVBMI-NEXT:    vpermi2b %ymm0, %ymm1, %ymm2
4726; AVX512VLVBMI-NEXT:    vmovdqa %ymm2, %ymm0
4727; AVX512VLVBMI-NEXT:    retq
4728;
4729; XOPAVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4730; XOPAVX1:       # %bb.0:
4731; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4732; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4733; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4734; XOPAVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4735; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4736; XOPAVX1-NEXT:    retq
4737;
4738; XOPAVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4739; XOPAVX2:       # %bb.0:
4740; XOPAVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4741; XOPAVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4742; XOPAVX2-NEXT:    retq
4743  %shuffle = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
4744  ret <32 x i8> %shuffle
4745}
4746
4747define <32 x i8> @shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i8> %a) {
4748; AVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4749; AVX1:       # %bb.0:
4750; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4751; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
4752; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
4753; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
4754; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4755; AVX1-NEXT:    retq
4756;
4757; AVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4758; AVX2:       # %bb.0:
4759; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30]
4760; AVX2-NEXT:    retq
4761;
4762; AVX512VL-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4763; AVX512VL:       # %bb.0:
4764; AVX512VL-NEXT:    vprold $8, %ymm0, %ymm0
4765; AVX512VL-NEXT:    retq
4766;
4767; XOPAVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4768; XOPAVX1:       # %bb.0:
4769; XOPAVX1-NEXT:    vprotd $8, %xmm0, %xmm1
4770; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4771; XOPAVX1-NEXT:    vprotd $8, %xmm0, %xmm0
4772; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4773; XOPAVX1-NEXT:    retq
4774;
4775; XOPAVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4776; XOPAVX2:       # %bb.0:
4777; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30]
4778; XOPAVX2-NEXT:    retq
4779  %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 19, i32 16, i32 17, i32 18, i32 23, i32 20, i32 21, i32 22, i32 27, i32 24, i32 25, i32 26, i32 31, i32 28, i32 29, i32 30>
4780  ret <32 x i8> %shuffle
4781}
4782
4783; PR44379
4784define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25(<32 x i8> %a) {
4785; AVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4786; AVX1:       # %bb.0:
4787; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,2,3,0,4,5,6,7]
4788; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,4]
4789; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4790; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
4791; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
4792; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4793; AVX1-NEXT:    retq
4794;
4795; AVX2-SLOW-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4796; AVX2-SLOW:       # %bb.0:
4797; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15]
4798; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12]
4799; AVX2-SLOW-NEXT:    retq
4800;
4801; AVX2-FAST-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4802; AVX2-FAST:       # %bb.0:
4803; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,18,19,20,21,22,23,16,17,26,27,28,29,30,31,24,25]
4804; AVX2-FAST-NEXT:    retq
4805;
4806; AVX512VL-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4807; AVX512VL:       # %bb.0:
4808; AVX512VL-NEXT:    vprolq $48, %ymm0, %ymm0
4809; AVX512VL-NEXT:    retq
4810;
4811; XOPAVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4812; XOPAVX1:       # %bb.0:
4813; XOPAVX1-NEXT:    vprotq $48, %xmm0, %xmm1
4814; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4815; XOPAVX1-NEXT:    vprotq $48, %xmm0, %xmm0
4816; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4817; XOPAVX1-NEXT:    retq
4818;
4819; XOPAVX2-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4820; XOPAVX2:       # %bb.0:
4821; XOPAVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15]
4822; XOPAVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12]
4823; XOPAVX2-NEXT:    retq
4824  %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25>
4825  ret <32 x i8> %shuffle
4826}
4827
4828; PR47194
4829define <32 x i8> @shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31(<32 x i8> %a) {
4830; AVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4831; AVX1:       # %bb.0:
4832; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4833; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
4834; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
4835; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4836; AVX1-NEXT:    retq
4837;
4838; AVX2-SLOW-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4839; AVX2-SLOW:       # %bb.0:
4840; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4841; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
4842; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4843; AVX2-SLOW-NEXT:    retq
4844;
4845; AVX2-FAST-ALL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4846; AVX2-FAST-ALL:       # %bb.0:
4847; AVX2-FAST-ALL-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4848; AVX2-FAST-ALL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
4849; AVX2-FAST-ALL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
4850; AVX2-FAST-ALL-NEXT:    retq
4851;
4852; AVX2-FAST-PERLANE-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4853; AVX2-FAST-PERLANE:       # %bb.0:
4854; AVX2-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
4855; AVX2-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4856; AVX2-FAST-PERLANE-NEXT:    retq
4857;
4858; AVX512VL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4859; AVX512VL:       # %bb.0:
4860; AVX512VL-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4861; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
4862; AVX512VL-NEXT:    retq
4863;
4864; XOPAVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4865; XOPAVX1:       # %bb.0:
4866; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
4867; XOPAVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
4868; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
4869; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
4870; XOPAVX1-NEXT:    retq
4871;
4872; XOPAVX2-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4873; XOPAVX2:       # %bb.0:
4874; XOPAVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4875; XOPAVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
4876; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4877; XOPAVX2-NEXT:    retq
4878  %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31>
4879  ret <32 x i8> %shuffle
4880}
4881
4882define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
4883; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4884; AVX1:       # %bb.0:
4885; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4886; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4887; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
4888; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
4889; AVX1-NEXT:    vpand %xmm3, %xmm4, %xmm4
4890; AVX1-NEXT:    vpackuswb %xmm2, %xmm4, %xmm2
4891; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
4892; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
4893; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
4894; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4895; AVX1-NEXT:    retq
4896;
4897; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4898; AVX2:       # %bb.0:
4899; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
4900; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
4901; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
4902; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4903; AVX2-NEXT:    retq
4904;
4905; AVX512VLBW-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4906; AVX512VLBW:       # %bb.0:
4907; AVX512VLBW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
4908; AVX512VLBW-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
4909; AVX512VLBW-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
4910; AVX512VLBW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4911; AVX512VLBW-NEXT:    retq
4912;
4913; AVX512VLVBMI-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4914; AVX512VLVBMI:       # %bb.0:
4915; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
4916; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
4917; AVX512VLVBMI-NEXT:    retq
4918;
4919; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4920; XOPAVX1:       # %bb.0:
4921; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4922; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
4923; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4924; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm3, %xmm2
4925; XOPAVX1-NEXT:    vpperm %xmm4, %xmm1, %xmm0, %xmm0
4926; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4927; XOPAVX1-NEXT:    retq
4928;
4929; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4930; XOPAVX2:       # %bb.0:
4931; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
4932; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
4933; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
4934; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4935; XOPAVX2-NEXT:    retq
4936  %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
4937  ret <32 x i8> %1
4938}
4939
4940define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
4941; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4942; AVX1:       # %bb.0:
4943; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4944; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4945; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
4946; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
4947; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
4948; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4949; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
4950; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
4951; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
4952; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4953; AVX1-NEXT:    retq
4954;
4955; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4956; AVX2:       # %bb.0:
4957; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
4958; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
4959; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
4960; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4961; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4962; AVX2-NEXT:    retq
4963;
4964; AVX512VL-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4965; AVX512VL:       # %bb.0:
4966; AVX512VL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4967; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4968; AVX512VL-NEXT:    vpmovwb %zmm0, %ymm0
4969; AVX512VL-NEXT:    retq
4970;
4971; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4972; XOPAVX1:       # %bb.0:
4973; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4974; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4975; XOPAVX1-NEXT:    vpperm %xmm3, %xmm2, %xmm1, %xmm1
4976; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4977; XOPAVX1-NEXT:    vpperm %xmm3, %xmm2, %xmm0, %xmm0
4978; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4979; XOPAVX1-NEXT:    retq
4980;
4981; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4982; XOPAVX2:       # %bb.0:
4983; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
4984; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
4985; XOPAVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
4986; XOPAVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4987; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4988; XOPAVX2-NEXT:    retq
4989  %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
4990  ret <32 x i8> %1
4991}
4992
4993define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<16 x i16> %a0, <16 x i16> %a1) {
4994; AVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4995; AVX1:       # %bb.0:
4996; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
4997; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
4998; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4999; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
5000; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5001; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
5002; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
5003; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
5004; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5005; AVX1-NEXT:    retq
5006;
5007; AVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
5008; AVX2:       # %bb.0:
5009; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
5010; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
5011; AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
5012; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5013; AVX2-NEXT:    retq
5014;
5015; AVX512VL-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
5016; AVX512VL:       # %bb.0:
5017; AVX512VL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
5018; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
5019; AVX512VL-NEXT:    vpsrlw $8, %zmm0, %zmm0
5020; AVX512VL-NEXT:    vpmovwb %zmm0, %ymm0
5021; AVX512VL-NEXT:    retq
5022;
5023; XOPAVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
5024; XOPAVX1:       # %bb.0:
5025; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
5026; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
5027; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
5028; XOPAVX1-NEXT:    vpperm %xmm4, %xmm3, %xmm1, %xmm1
5029; XOPAVX1-NEXT:    vpperm %xmm4, %xmm2, %xmm0, %xmm0
5030; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5031; XOPAVX1-NEXT:    retq
5032;
5033; XOPAVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
5034; XOPAVX2:       # %bb.0:
5035; XOPAVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
5036; XOPAVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
5037; XOPAVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
5038; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5039; XOPAVX2-NEXT:    retq
5040  %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
5041  %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
5042  %3 = bitcast <16 x i16> %1 to <32 x i8>
5043  %4 = bitcast <16 x i16> %2 to <32 x i8>
5044  %5 = shufflevector <32 x i8> %3, <32 x i8> %4, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
5045  ret <32 x i8> %5
5046}
5047
5048define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) {
5049; AVX1-LABEL: PR28136:
5050; AVX1:       # %bb.0:
5051; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5052; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5053; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero,xmm1[9],zero,xmm1[11],zero,xmm1[13],zero,xmm1[15]
5054; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
5055; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
5056; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero,xmm0[9],zero,xmm0[11],zero,xmm0[13],zero,xmm0[15],zero
5057; AVX1-NEXT:    vpor %xmm2, %xmm3, %xmm2
5058; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[1],zero,xmm1[3],zero,xmm1[5],zero,xmm1[7]
5059; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[1],zero,xmm0[3],zero,xmm0[5],zero,xmm0[7],zero
5060; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
5061; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5062; AVX1-NEXT:    retq
5063;
5064; AVX2OR512VL-LABEL: PR28136:
5065; AVX2OR512VL:       # %bb.0:
5066; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
5067; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5068; AVX2OR512VL-NEXT:    retq
5069;
5070; XOPAVX1-LABEL: PR28136:
5071; XOPAVX1:       # %bb.0:
5072; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5073; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5074; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
5075; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
5076; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[10],xmm1[10],xmm0[12],xmm1[12],xmm0[14],xmm1[14],xmm0[9],xmm1[9],xmm0[11],xmm1[11],xmm0[13],xmm1[13],xmm0[15],xmm1[15]
5077; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2],xmm1[2],xmm0[4],xmm1[4],xmm0[6],xmm1[6],xmm0[1],xmm1[1],xmm0[3],xmm1[3],xmm0[5],xmm1[5],xmm0[7],xmm1[7]
5078; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5079; XOPAVX1-NEXT:    retq
5080;
5081; XOPAVX2-LABEL: PR28136:
5082; XOPAVX2:       # %bb.0:
5083; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
5084; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5085; XOPAVX2-NEXT:    retq
5086  %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50,i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
5087  %2 = bitcast <32 x i8> %1 to <4 x i64>
5088  %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
5089  ret <4 x i64> %3
5090}
5091
5092define <32 x i8> @PR47262(<4 x i64> %a0) {
5093; AVX1-LABEL: PR47262:
5094; AVX1:       # %bb.0:
5095; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5096; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5097; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5098; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
5099; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5100; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
5101; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5102; AVX1-NEXT:    retq
5103;
5104; AVX2-LABEL: PR47262:
5105; AVX2:       # %bb.0:
5106; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5107; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5108; AVX2-NEXT:    retq
5109;
5110; AVX512VLBW-LABEL: PR47262:
5111; AVX512VLBW:       # %bb.0:
5112; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5113; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5114; AVX512VLBW-NEXT:    retq
5115;
5116; AVX512VLVBMI-LABEL: PR47262:
5117; AVX512VLVBMI:       # %bb.0:
5118; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,4,16,20,1,5,17,21,2,6,18,22,3,7,19,23,8,12,24,28,9,13,25,29,10,14,26,30,11,15,27,31]
5119; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
5120; AVX512VLVBMI-NEXT:    retq
5121;
5122; XOPAVX1-LABEL: PR47262:
5123; XOPAVX1:       # %bb.0:
5124; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5125; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm2 = xmm0[8,12],xmm1[8,12],xmm0[9,13],xmm1[9,13],xmm0[10,14],xmm1[10,14],xmm0[11,15],xmm1[11,15]
5126; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,4],xmm1[0,4],xmm0[1,5],xmm1[1,5],xmm0[2,6],xmm1[2,6],xmm0[3,7],xmm1[3,7]
5127; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5128; XOPAVX1-NEXT:    retq
5129;
5130; XOPAVX2-LABEL: PR47262:
5131; XOPAVX2:       # %bb.0:
5132; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5133; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5134; XOPAVX2-NEXT:    retq
5135  %t1 = shufflevector <4 x i64> %a0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
5136  %t2 = bitcast <4 x i64> %t1 to <32 x i8>
5137  %t3 = shufflevector <32 x i8> %t2, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15, i32 16, i32 20, i32 24, i32 28, i32 17, i32 21, i32 25, i32 29, i32 18, i32 22, i32 26, i32 30, i32 19, i32 23, i32 27, i32 31>
5138    ret <32 x i8> %t3
5139}
5140
5141define <32 x i8> @PR55066(<32 x i8> %a0) {
5142; AVX1-LABEL: PR55066:
5143; AVX1:       # %bb.0:
5144; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5145; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
5146; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
5147; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
5148; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5149; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5150; AVX1-NEXT:    retq
5151;
5152; AVX2-LABEL: PR55066:
5153; AVX2:       # %bb.0:
5154; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5155; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5156; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
5157; AVX2-NEXT:    retq
5158;
5159; AVX512VLBW-LABEL: PR55066:
5160; AVX512VLBW:       # %bb.0:
5161; AVX512VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5162; AVX512VLBW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5163; AVX512VLBW-NEXT:    vpermd %ymm0, %ymm1, %ymm0
5164; AVX512VLBW-NEXT:    retq
5165;
5166; AVX512VLVBMI-LABEL: PR55066:
5167; AVX512VLVBMI:       # %bb.0:
5168; AVX512VLVBMI-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28]
5169; AVX512VLVBMI-NEXT:    vpermb %ymm0, %ymm1, %ymm0
5170; AVX512VLVBMI-NEXT:    retq
5171;
5172; XOPAVX1-LABEL: PR55066:
5173; XOPAVX1:       # %bb.0:
5174; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
5175; XOPAVX1-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,4,8,12],xmm1[0,4,8,12],xmm0[0,4,8,12],xmm1[0,4,8,12]
5176; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5177; XOPAVX1-NEXT:    retq
5178;
5179; XOPAVX2-LABEL: PR55066:
5180; XOPAVX2:       # %bb.0:
5181; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5182; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5183; XOPAVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
5184; XOPAVX2-NEXT:    retq
5185  %shuffle = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
5186  ret <32 x i8> %shuffle
5187}
5188
5189define <4 x i64> @PR66150(ptr %b) {
5190; AVX1-LABEL: PR66150:
5191; AVX1:       # %bb.0:
5192; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5193; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
5194; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
5195; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
5196; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5197; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
5198; AVX1-NEXT:    retq
5199;
5200; AVX2OR512VL-LABEL: PR66150:
5201; AVX2OR512VL:       # %bb.0:
5202; AVX2OR512VL-NEXT:    vpbroadcastd (%rdi), %ymm0
5203; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
5204; AVX2OR512VL-NEXT:    retq
5205;
5206; XOPAVX1-LABEL: PR66150:
5207; XOPAVX1:       # %bb.0:
5208; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5209; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
5210; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
5211; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
5212; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5213; XOPAVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
5214; XOPAVX1-NEXT:    retq
5215;
5216; XOPAVX2-LABEL: PR66150:
5217; XOPAVX2:       # %bb.0:
5218; XOPAVX2-NEXT:    vpbroadcastd (%rdi), %ymm0
5219; XOPAVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
5220; XOPAVX2-NEXT:    retq
5221  %tmp1 = load i32, ptr %b, align 4
5222  %tmp2 = insertelement <8 x i32> poison, i32 %tmp1, i64 0
5223  %tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>
5224  %tmp4 = bitcast <8 x i32> %tmp3 to <32 x i8>
5225  %tmp5 = shufflevector <32 x i8> %tmp4, <32 x i8> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19>
5226  %tmp6 = bitcast <32 x i8> %tmp5 to <4 x i64>
5227  ret <4 x i64> %tmp6
5228}
5229
5230define <64 x i8> @PR103564(<32 x i8> %a0, <32 x i8> %a1) {
5231; AVX1-LABEL: PR103564:
5232; AVX1:       # %bb.0:
5233; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5234; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5235; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
5236; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
5237; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
5238; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5239; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5240; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm1
5241; AVX1-NEXT:    vmovaps %ymm2, %ymm0
5242; AVX1-NEXT:    retq
5243;
5244; AVX2-LABEL: PR103564:
5245; AVX2:       # %bb.0:
5246; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
5247; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
5248; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm2[0,1]
5249; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm2[2,3]
5250; AVX2-NEXT:    retq
5251;
5252; AVX512VLBW-LABEL: PR103564:
5253; AVX512VLBW:       # %bb.0:
5254; AVX512VLBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
5255; AVX512VLBW-NEXT:    vextracti128 $1, %ymm0, %xmm3
5256; AVX512VLBW-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
5257; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5258; AVX512VLBW-NEXT:    vinserti128 $1, %xmm4, %ymm2, %ymm2
5259; AVX512VLBW-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5260; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5261; AVX512VLBW-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
5262; AVX512VLBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
5263; AVX512VLBW-NEXT:    retq
5264;
5265; AVX512VLVBMI-LABEL: PR103564:
5266; AVX512VLVBMI:       # %bb.0:
5267; AVX512VLVBMI-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
5268; AVX512VLVBMI-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
5269; AVX512VLVBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
5270; AVX512VLVBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
5271; AVX512VLVBMI-NEXT:    retq
5272;
5273; XOPAVX1-LABEL: PR103564:
5274; XOPAVX1:       # %bb.0:
5275; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5276; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5277; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
5278; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
5279; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
5280; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5281; XOPAVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5282; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm1
5283; XOPAVX1-NEXT:    vmovaps %ymm2, %ymm0
5284; XOPAVX1-NEXT:    retq
5285;
5286; XOPAVX2-LABEL: PR103564:
5287; XOPAVX2:       # %bb.0:
5288; XOPAVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
5289; XOPAVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
5290; XOPAVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm2[0,1]
5291; XOPAVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm2[2,3]
5292; XOPAVX2-NEXT:    retq
5293  %r = shufflevector <32 x i8> %a0, <32 x i8> %a1, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
5294  ret <64 x i8> %r
5295}
5296
5297define <32 x i8> @insert_dup_mem_v32i8_i32(ptr %ptr) {
5298; AVX1-LABEL: insert_dup_mem_v32i8_i32:
5299; AVX1:       # %bb.0:
5300; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5301; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5302; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
5303; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5304; AVX1-NEXT:    retq
5305;
5306; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_i32:
5307; AVX2OR512VL:       # %bb.0:
5308; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %ymm0
5309; AVX2OR512VL-NEXT:    retq
5310;
5311; XOPAVX1-LABEL: insert_dup_mem_v32i8_i32:
5312; XOPAVX1:       # %bb.0:
5313; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5314; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5315; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
5316; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5317; XOPAVX1-NEXT:    retq
5318;
5319; XOPAVX2-LABEL: insert_dup_mem_v32i8_i32:
5320; XOPAVX2:       # %bb.0:
5321; XOPAVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
5322; XOPAVX2-NEXT:    retq
5323  %tmp = load i32, ptr %ptr, align 4
5324  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5325  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5326  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> zeroinitializer
5327  ret <32 x i8> %tmp3
5328}
5329
5330define <32 x i8> @insert_dup_mem_v32i8_sext_i8(ptr %ptr) {
5331; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
5332; AVX1:       # %bb.0:
5333; AVX1-NEXT:    movzbl (%rdi), %eax
5334; AVX1-NEXT:    vmovd %eax, %xmm0
5335; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5336; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
5337; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5338; AVX1-NEXT:    retq
5339;
5340; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_sext_i8:
5341; AVX2OR512VL:       # %bb.0:
5342; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %ymm0
5343; AVX2OR512VL-NEXT:    retq
5344;
5345; XOPAVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
5346; XOPAVX1:       # %bb.0:
5347; XOPAVX1-NEXT:    movzbl (%rdi), %eax
5348; XOPAVX1-NEXT:    vmovd %eax, %xmm0
5349; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5350; XOPAVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
5351; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5352; XOPAVX1-NEXT:    retq
5353;
5354; XOPAVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
5355; XOPAVX2:       # %bb.0:
5356; XOPAVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
5357; XOPAVX2-NEXT:    retq
5358  %tmp = load i8, ptr %ptr, align 1
5359  %tmp1 = sext i8 %tmp to i32
5360  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
5361  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
5362  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <32 x i32> zeroinitializer
5363  ret <32 x i8> %tmp4
5364}
5365
5366define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(ptr %ptr) {
5367; AVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
5368; AVX1:       # %bb.0:
5369; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5370; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5371; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5372; AVX1-NEXT:    retq
5373;
5374; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v32i8_i32:
5375; AVX2OR512VL:       # %bb.0:
5376; AVX2OR512VL-NEXT:    vpbroadcastb 1(%rdi), %ymm0
5377; AVX2OR512VL-NEXT:    retq
5378;
5379; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
5380; XOPAVX1:       # %bb.0:
5381; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5382; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5383; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5384; XOPAVX1-NEXT:    retq
5385;
5386; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
5387; XOPAVX2:       # %bb.0:
5388; XOPAVX2-NEXT:    vpbroadcastb 1(%rdi), %ymm0
5389; XOPAVX2-NEXT:    retq
5390  %tmp = load i32, ptr %ptr, align 4
5391  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5392  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5393  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5394  ret <32 x i8> %tmp3
5395}
5396
5397define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(ptr %ptr) {
5398; AVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
5399; AVX1:       # %bb.0:
5400; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5401; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
5402; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5403; AVX1-NEXT:    retq
5404;
5405; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v32i8_i32:
5406; AVX2OR512VL:       # %bb.0:
5407; AVX2OR512VL-NEXT:    vpbroadcastb 3(%rdi), %ymm0
5408; AVX2OR512VL-NEXT:    retq
5409;
5410; XOPAVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
5411; XOPAVX1:       # %bb.0:
5412; XOPAVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5413; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
5414; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5415; XOPAVX1-NEXT:    retq
5416;
5417; XOPAVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
5418; XOPAVX2:       # %bb.0:
5419; XOPAVX2-NEXT:    vpbroadcastb 3(%rdi), %ymm0
5420; XOPAVX2-NEXT:    retq
5421  %tmp = load i32, ptr %ptr, align 4
5422  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5423  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5424  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
5425  ret <32 x i8> %tmp3
5426}
5427
5428define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(ptr %ptr) {
5429; AVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5430; AVX1:       # %bb.0:
5431; AVX1-NEXT:    movsbl (%rdi), %eax
5432; AVX1-NEXT:    vmovd %eax, %xmm0
5433; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5434; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5435; AVX1-NEXT:    retq
5436;
5437; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5438; AVX2:       # %bb.0:
5439; AVX2-NEXT:    movsbl (%rdi), %eax
5440; AVX2-NEXT:    shrl $8, %eax
5441; AVX2-NEXT:    vmovd %eax, %xmm0
5442; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
5443; AVX2-NEXT:    retq
5444;
5445; AVX512VL-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5446; AVX512VL:       # %bb.0:
5447; AVX512VL-NEXT:    movsbl (%rdi), %eax
5448; AVX512VL-NEXT:    shrl $8, %eax
5449; AVX512VL-NEXT:    vpbroadcastb %eax, %ymm0
5450; AVX512VL-NEXT:    retq
5451;
5452; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5453; XOPAVX1:       # %bb.0:
5454; XOPAVX1-NEXT:    movsbl (%rdi), %eax
5455; XOPAVX1-NEXT:    vmovd %eax, %xmm0
5456; XOPAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5457; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
5458; XOPAVX1-NEXT:    retq
5459;
5460; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5461; XOPAVX2:       # %bb.0:
5462; XOPAVX2-NEXT:    movsbl (%rdi), %eax
5463; XOPAVX2-NEXT:    shrl $8, %eax
5464; XOPAVX2-NEXT:    vmovd %eax, %xmm0
5465; XOPAVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
5466; XOPAVX2-NEXT:    retq
5467  %tmp = load i8, ptr %ptr, align 1
5468  %tmp1 = sext i8 %tmp to i32
5469  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
5470  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
5471  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5472  ret <32 x i8> %tmp4
5473}
5474
5475define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) {
5476; AVX1-LABEL: zeroable_src_to_zext:
5477; AVX1:       # %bb.0:
5478; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5479; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
5480; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5481; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5482; AVX1-NEXT:    retq
5483;
5484; AVX2OR512VL-LABEL: zeroable_src_to_zext:
5485; AVX2OR512VL:       # %bb.0:
5486; AVX2OR512VL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
5487; AVX2OR512VL-NEXT:    retq
5488;
5489; XOPAVX1-LABEL: zeroable_src_to_zext:
5490; XOPAVX1:       # %bb.0:
5491; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5492; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
5493; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5494; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5495; XOPAVX1-NEXT:    retq
5496;
5497; XOPAVX2-LABEL: zeroable_src_to_zext:
5498; XOPAVX2:       # %bb.0:
5499; XOPAVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
5500; XOPAVX2-NEXT:    retq
5501  %1 = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
5502  %2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
5503  ret <32 x i8> %2
5504}
5505
5506define <32 x i8> @unpckh_v32i8(<32 x i8> %x, <32 x i8> %y) {
5507; AVX1-LABEL: unpckh_v32i8:
5508; AVX1:       # %bb.0:
5509; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
5510; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5511; AVX1-NEXT:    retq
5512;
5513; AVX2OR512VL-LABEL: unpckh_v32i8:
5514; AVX2OR512VL:       # %bb.0:
5515; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm1, %xmm1
5516; AVX2OR512VL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5517; AVX2OR512VL-NEXT:    retq
5518;
5519; XOPAVX1-LABEL: unpckh_v32i8:
5520; XOPAVX1:       # %bb.0:
5521; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
5522; XOPAVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5523; XOPAVX1-NEXT:    retq
5524;
5525; XOPAVX2-LABEL: unpckh_v32i8:
5526; XOPAVX2:       # %bb.0:
5527; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
5528; XOPAVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5529; XOPAVX2-NEXT:    retq
5530  %unpckh = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 8, i32 56, i32 9, i32 57, i32 10, i32 58, i32 11, i32 59, i32 12, i32 60, i32 13, i32 61, i32 14, i32 62, i32 15, i32 63, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
5531  ret <32 x i8> %unpckh
5532}
5533
5534define <32 x i8> @shuffle_v16i16_zextinreg_to_v8i32(<32 x i8> %a)  {
5535; AVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5536; AVX1:       # %bb.0:
5537; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5538; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5539; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5540; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5541; AVX1-NEXT:    retq
5542;
5543; AVX2OR512VL-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5544; AVX2OR512VL:       # %bb.0:
5545; AVX2OR512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5546; AVX2OR512VL-NEXT:    retq
5547;
5548; XOPAVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5549; XOPAVX1:       # %bb.0:
5550; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5551; XOPAVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5552; XOPAVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5553; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5554; XOPAVX1-NEXT:    retq
5555;
5556; XOPAVX2-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5557; XOPAVX2:       # %bb.0:
5558; XOPAVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5559; XOPAVX2-NEXT:    retq
5560  %b = shufflevector <32 x i8> %a, <32 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 0, i32 1, i32 42, i32 42, i32 2, i32 3, i32 42, i32 42, i32 4, i32 5, i32 42, i32 42, i32 6, i32 7, i32 42, i32 42, i32 8, i32 9, i32 42, i32 42, i32 10, i32 11, i32 42, i32 42, i32 12, i32 13, i32 42, i32 42, i32 14, i32 15, i32 42, i32 42>
5561  ret <32 x i8> %b
5562}
5563