xref: /llvm-project/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll (revision f6ff2cc7e0ae4fd9b14583a998ddeada256a954f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2                                                                 | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle                                | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle                                  | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
6
7; Shuffle lowest element of some subvector into highest element of some subvector.
8; Mainly this is testing how well we avoid subvector extractions/insertions.
9; https://bugs.llvm.org/show_bug.cgi?id=50971
10
11define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<2 x double> %x) nounwind {
12; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
13; CHECK:       # %bb.0:
14; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
15; CHECK-NEXT:    retq
16  %r = shufflevector <2 x double> %x, <2 x double> poison, <2 x i32> zeroinitializer
17  ret <2 x double> %r
18}
19
20define <2 x double> @vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<2 x double> %x, <2 x double> %y) nounwind {
21; CHECK-LABEL: vec128_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
24; CHECK-NEXT:    retq
25  %r = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
26  ret <2 x double> %r
27}
28
29define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<2 x i64> %x) nounwind {
30; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
33; CHECK-NEXT:    retq
34  %r = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> zeroinitializer
35  ret <2 x i64> %r
36}
37
38define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<2 x i64> %x, <2 x i64> %y) nounwind {
39; CHECK-LABEL: vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
42; CHECK-NEXT:    retq
43  %r = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
44  ret <2 x i64> %r
45}
46
47define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<4 x float> %x) nounwind {
48; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
51; CHECK-NEXT:    retq
52  %r = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
53  ret <4 x float> %r
54}
55
56define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<4 x float> %x, <4 x float> %y) nounwind {
57; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
60; CHECK-NEXT:    retq
61  %r = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
62  ret <4 x float> %r
63}
64
65define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<4 x i32> %x) nounwind {
66; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
69; CHECK-NEXT:    retq
70  %r = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
71  ret <4 x i32> %r
72}
73
74define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<4 x i32> %x, <4 x i32> %y) nounwind {
75; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    vbroadcastss %xmm1, %xmm1
78; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
79; CHECK-NEXT:    retq
80  %r = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
81  ret <4 x i32> %r
82}
83
84define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<8 x i16> %x) nounwind {
85; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1]
88; CHECK-NEXT:    retq
89  %r = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
90  ret <8 x i16> %r
91}
92
93define <8 x i16> @vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<8 x i16> %x, <8 x i16> %y) nounwind {
94; CHECK-LABEL: vec128_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vpbroadcastw %xmm1, %xmm1
97; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
98; CHECK-NEXT:    retq
99  %r = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
100  ret <8 x i16> %r
101}
102
103define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<16 x i8> %x) nounwind {
104; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0]
107; CHECK-NEXT:    retq
108  %r = shufflevector <16 x i8> %x, <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
109  ret <16 x i8> %r
110}
111
112define <16 x i8> @vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<16 x i8> %x, <16 x i8> %y) nounwind {
113; CHECK-LABEL: vec128_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
116; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
117; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
118; CHECK-NEXT:    retq
119  %r = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
120  ret <16 x i8> %r
121}
122
123define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
124; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_unary:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
127; CHECK-NEXT:    retq
128  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
129  ret <4 x double> %r
130}
131
132define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
133; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_1_binary:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
136; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
137; CHECK-NEXT:    retq
138  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
139  ret <4 x double> %r
140}
141
142define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
143; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_unary:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
146; CHECK-NEXT:    retq
147  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
148  ret <4 x double> %r
149}
150
151define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
152; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_2_binary:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
155; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
156; CHECK-NEXT:    retq
157  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
158  ret <4 x double> %r
159}
160
161define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
162; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_unary:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
165; CHECK-NEXT:    retq
166  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
167  ret <4 x double> %r
168}
169
170define <4 x double> @vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
171; CHECK-LABEL: vec256_eltty_double_source_subvec_0_target_subvec_mask_3_binary:
172; CHECK:       # %bb.0:
173; CHECK-NEXT:    vbroadcastsd %xmm1, %ymm1
174; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
175; CHECK-NEXT:    retq
176  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
177  ret <4 x double> %r
178}
179
180define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary(<4 x double> %x) nounwind {
181; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_unary:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
184; CHECK-NEXT:    retq
185  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
186  ret <4 x double> %r
187}
188
189define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary(<4 x double> %x, <4 x double> %y) nounwind {
190; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_1_binary:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[2,3]
193; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
194; CHECK-NEXT:    retq
195  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
196  ret <4 x double> %r
197}
198
199define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary(<4 x double> %x) nounwind {
200; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_unary:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
203; CHECK-NEXT:    retq
204  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
205  ret <4 x double> %r
206}
207
208define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary(<4 x double> %x, <4 x double> %y) nounwind {
209; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_2_binary:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
212; CHECK-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
213; CHECK-NEXT:    retq
214  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
215  ret <4 x double> %r
216}
217
218define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary(<4 x double> %x) nounwind {
219; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_unary:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
222; CHECK-NEXT:    retq
223  %r = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
224  ret <4 x double> %r
225}
226
227define <4 x double> @vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary(<4 x double> %x, <4 x double> %y) nounwind {
228; CHECK-LABEL: vec256_eltty_double_source_subvec_1_target_subvec_mask_3_binary:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
231; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
232; CHECK-NEXT:    retq
233  %r = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
234  ret <4 x double> %r
235}
236
237define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
238; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_unary:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
241; CHECK-NEXT:    retq
242  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
243  ret <4 x i64> %r
244}
245
246define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
247; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_1_binary:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
250; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
251; CHECK-NEXT:    retq
252  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
253  ret <4 x i64> %r
254}
255
256define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
257; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_unary:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0]
260; CHECK-NEXT:    retq
261  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
262  ret <4 x i64> %r
263}
264
265define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
266; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_2_binary:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vbroadcastsd %xmm1, %ymm1
269; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
270; CHECK-NEXT:    retq
271  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
272  ret <4 x i64> %r
273}
274
275define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
276; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary:
277; CHECK:       # %bb.0:
278; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
279; CHECK-NEXT:    retq
280  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
281  ret <4 x i64> %r
282}
283
284define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
285; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    vbroadcastsd %xmm1, %ymm1
288; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
289; CHECK-NEXT:    retq
290  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
291  ret <4 x i64> %r
292}
293
294define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary(<4 x i64> %x) nounwind {
295; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_unary:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
298; CHECK-NEXT:    retq
299  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
300  ret <4 x i64> %r
301}
302
303define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
304; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_1_binary:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
307; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
308; CHECK-NEXT:    retq
309  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
310  ret <4 x i64> %r
311}
312
313define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary(<4 x i64> %x) nounwind {
314; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
317; CHECK-NEXT:    retq
318  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
319  ret <4 x i64> %r
320}
321
322define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
323; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary:
324; CHECK:       # %bb.0:
325; CHECK-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
326; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
327; CHECK-NEXT:    retq
328  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
329  ret <4 x i64> %r
330}
331
332define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary(<4 x i64> %x) nounwind {
333; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_unary:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,2]
336; CHECK-NEXT:    retq
337  %r = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
338  ret <4 x i64> %r
339}
340
341define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
342; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
345; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
346; CHECK-NEXT:    retq
347  %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
348  ret <4 x i64> %r
349}
350
351define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
352; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
353; CHECK:       # %bb.0:
354; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,7]
355; CHECK-NEXT:    retq
356  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
357  ret <8 x float> %r
358}
359
360define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
361; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_1_binary:
362; CHECK:       # %bb.0:
363; CHECK-NEXT:    vbroadcastss %xmm1, %xmm1
364; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
365; CHECK-NEXT:    retq
366  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
367  ret <8 x float> %r
368}
369
370define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
371; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_unary:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
374; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
375; CHECK-NEXT:    retq
376  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
377  ret <8 x float> %r
378}
379
380define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
381; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_2_binary:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    vbroadcastss %xmm1, %ymm1
384; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
385; CHECK-NEXT:    retq
386  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
387  ret <8 x float> %r
388}
389
390define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
391; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_unary:
392; CHECK:       # %bb.0:
393; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
394; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
395; CHECK-NEXT:    retq
396  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
397  ret <8 x float> %r
398}
399
400define <8 x float> @vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
401; CHECK-LABEL: vec256_eltty_float_source_subvec_0_target_subvec_mask_3_binary:
402; CHECK:       # %bb.0:
403; CHECK-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
404; CHECK-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
405; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
406; CHECK-NEXT:    retq
407  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
408  ret <8 x float> %r
409}
410
411define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary(<8 x float> %x) nounwind {
412; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_unary:
413; CHECK:       # %bb.0:
414; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
415; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
416; CHECK-NEXT:    retq
417  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
418  ret <8 x float> %r
419}
420
421define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary(<8 x float> %x, <8 x float> %y) nounwind {
422; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_1_binary:
423; CHECK:       # %bb.0:
424; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm1
425; CHECK-NEXT:    vbroadcastss %xmm1, %xmm1
426; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
427; CHECK-NEXT:    retq
428  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
429  ret <8 x float> %r
430}
431
432define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary(<8 x float> %x) nounwind {
433; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_unary:
434; CHECK:       # %bb.0:
435; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,4]
436; CHECK-NEXT:    retq
437  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
438  ret <8 x float> %r
439}
440
441define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary(<8 x float> %x, <8 x float> %y) nounwind {
442; CHECK-SLOW-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
443; CHECK-SLOW:       # %bb.0:
444; CHECK-SLOW-NEXT:    vextractf128 $1, %ymm1, %xmm1
445; CHECK-SLOW-NEXT:    vbroadcastss %xmm1, %ymm1
446; CHECK-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
447; CHECK-SLOW-NEXT:    retq
448;
449; CHECK-FAST-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
450; CHECK-FAST:       # %bb.0:
451; CHECK-FAST-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
452; CHECK-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
453; CHECK-FAST-NEXT:    retq
454  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
455  ret <8 x float> %r
456}
457
458define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary(<8 x float> %x) nounwind {
459; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_unary:
460; CHECK:       # %bb.0:
461; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
462; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
463; CHECK-NEXT:    retq
464  %r = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
465  ret <8 x float> %r
466}
467
468define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary(<8 x float> %x, <8 x float> %y) nounwind {
469; CHECK-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_3_binary:
470; CHECK:       # %bb.0:
471; CHECK-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
472; CHECK-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
473; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,0],ymm0[4,5],ymm1[6,4]
474; CHECK-NEXT:    retq
475  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
476  ret <8 x float> %r
477}
478
479define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
480; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
481; CHECK:       # %bb.0:
482; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,7]
483; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
484; CHECK-NEXT:    retq
485  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
486  ret <8 x i32> %r
487}
488
489define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
490; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_1_binary:
491; CHECK:       # %bb.0:
492; CHECK-NEXT:    vbroadcastss %xmm1, %xmm1
493; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
494; CHECK-NEXT:    retq
495  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
496  ret <8 x i32> %r
497}
498
499define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
500; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_unary:
501; CHECK:       # %bb.0:
502; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,0]
503; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
504; CHECK-NEXT:    retq
505  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
506  ret <8 x i32> %r
507}
508
509define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
510; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_2_binary:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    vbroadcastss %xmm1, %ymm1
513; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
514; CHECK-NEXT:    retq
515  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
516  ret <8 x i32> %r
517}
518
519define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
520; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_unary:
521; CHECK:       # %bb.0:
522; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,0,4,5,6,0]
523; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
524; CHECK-NEXT:    retq
525  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 0>
526  ret <8 x i32> %r
527}
528
529define <8 x i32> @vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
530; CHECK-LABEL: vec256_eltty_i32_source_subvec_0_target_subvec_mask_3_binary:
531; CHECK:       # %bb.0:
532; CHECK-NEXT:    vbroadcastss %xmm1, %ymm1
533; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
534; CHECK-NEXT:    retq
535  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 8>
536  ret <8 x i32> %r
537}
538
539define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary(<8 x i32> %x) nounwind {
540; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_unary:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,7]
543; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
544; CHECK-NEXT:    retq
545  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
546  ret <8 x i32> %r
547}
548
549define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
550; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_1_binary:
551; CHECK:       # %bb.0:
552; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm1
553; CHECK-NEXT:    vbroadcastss %xmm1, %xmm1
554; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7]
555; CHECK-NEXT:    retq
556  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 7>
557  ret <8 x i32> %r
558}
559
560define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary(<8 x i32> %x) nounwind {
561; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_unary:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,3,4,5,6,4]
564; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
565; CHECK-NEXT:    retq
566  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
567  ret <8 x i32> %r
568}
569
570define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
571; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
572; CHECK-SLOW:       # %bb.0:
573; CHECK-SLOW-NEXT:    vextractf128 $1, %ymm1, %xmm1
574; CHECK-SLOW-NEXT:    vbroadcastss %xmm1, %ymm1
575; CHECK-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
576; CHECK-SLOW-NEXT:    retq
577;
578; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
579; CHECK-FAST:       # %bb.0:
580; CHECK-FAST-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
581; CHECK-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
582; CHECK-FAST-NEXT:    retq
583  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
584  ret <8 x i32> %r
585}
586
587define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary(<8 x i32> %x) nounwind {
588; CHECK-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary:
589; CHECK:       # %bb.0:
590; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,4,4,5,6,4]
591; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
592; CHECK-NEXT:    retq
593  %r = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 4>
594  ret <8 x i32> %r
595}
596
597define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
598; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
599; CHECK-SLOW:       # %bb.0:
600; CHECK-SLOW-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
601; CHECK-SLOW-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
602; CHECK-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
603; CHECK-SLOW-NEXT:    retq
604;
605; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
606; CHECK-FAST:       # %bb.0:
607; CHECK-FAST-NEXT:    vbroadcastss {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
608; CHECK-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
609; CHECK-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
610; CHECK-FAST-NEXT:    retq
611  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 12, i32 4, i32 5, i32 6, i32 12>
612  ret <8 x i32> %r
613}
614
615define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
616; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_unary:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
619; CHECK-NEXT:    retq
620  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
621  ret <16 x i16> %r
622}
623
624define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
625; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_1_binary:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    vpbroadcastw %xmm1, %xmm1
628; CHECK-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
629; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
630; CHECK-NEXT:    retq
631  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
632  ret <16 x i16> %r
633}
634
635define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
636; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_unary:
637; CHECK:       # %bb.0:
638; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm1
639; CHECK-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
640; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
641; CHECK-NEXT:    retq
642  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
643  ret <16 x i16> %r
644}
645
646define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
647; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_2_binary:
648; CHECK:       # %bb.0:
649; CHECK-NEXT:    vpbroadcastw %xmm1, %ymm1
650; CHECK-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
651; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
652; CHECK-NEXT:    retq
653  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
654  ret <16 x i16> %r
655}
656
657define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
658; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_unary:
659; CHECK:       # %bb.0:
660; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm1
661; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
662; CHECK-NEXT:    retq
663  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
664  ret <16 x i16> %r
665}
666
667define <16 x i16> @vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
668; CHECK-LABEL: vec256_eltty_i16_source_subvec_0_target_subvec_mask_3_binary:
669; CHECK:       # %bb.0:
670; CHECK-NEXT:    vpbroadcastw %xmm1, %ymm1
671; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
672; CHECK-NEXT:    retq
673  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
674  ret <16 x i16> %r
675}
676
677define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary(<16 x i16> %x) nounwind {
678; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_unary:
679; CHECK:       # %bb.0:
680; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
681; CHECK-NEXT:    vpbroadcastw %xmm1, %xmm1
682; CHECK-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
683; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
684; CHECK-NEXT:    retq
685  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
686  ret <16 x i16> %r
687}
688
689define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
690; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_1_binary:
691; CHECK:       # %bb.0:
692; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm1
693; CHECK-NEXT:    vpbroadcastw %xmm1, %xmm1
694; CHECK-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
695; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
696; CHECK-NEXT:    retq
697  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
698  ret <16 x i16> %r
699}
700
701define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary(<16 x i16> %x) nounwind {
702; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_unary:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,16,17]
705; CHECK-NEXT:    retq
706  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
707  ret <16 x i16> %r
708}
709
710define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
711; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_2_binary:
712; CHECK:       # %bb.0:
713; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm1
714; CHECK-NEXT:    vpbroadcastw %xmm1, %ymm1
715; CHECK-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
716; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
717; CHECK-NEXT:    retq
718  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
719  ret <16 x i16> %r
720}
721
722define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary(<16 x i16> %x) nounwind {
723; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_unary:
724; CHECK:       # %bb.0:
725; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
726; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
727; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
728; CHECK-NEXT:    retq
729  %r = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
730  ret <16 x i16> %r
731}
732
733define <16 x i16> @vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary(<16 x i16> %x, <16 x i16> %y) nounwind {
734; CHECK-LABEL: vec256_eltty_i16_source_subvec_1_target_subvec_mask_3_binary:
735; CHECK:       # %bb.0:
736; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
737; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
738; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
739; CHECK-NEXT:    retq
740  %r = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 24, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24>
741  ret <16 x i16> %r
742}
743
744define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
745; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_unary:
746; CHECK:       # %bb.0:
747; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
748; CHECK-NEXT:    retq
749  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
750  ret <32 x i8> %r
751}
752
753define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
754; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_1_binary:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    vpbroadcastb %xmm1, %ymm1
757; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
758; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
759; CHECK-NEXT:    retq
760  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
761  ret <32 x i8> %r
762}
763
764define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
765; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_unary:
766; CHECK:       # %bb.0:
767; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm1
768; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
769; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
770; CHECK-NEXT:    retq
771  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
772  ret <32 x i8> %r
773}
774
775define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
776; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_2_binary:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vpbroadcastb %xmm1, %ymm1
779; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
780; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
781; CHECK-NEXT:    retq
782  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
783  ret <32 x i8> %r
784}
785
786define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
787; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_unary:
788; CHECK:       # %bb.0:
789; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
790; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
791; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
792; CHECK-NEXT:    # ymm2 = mem[0,1,0,1]
793; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
794; CHECK-NEXT:    retq
795  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 0>
796  ret <32 x i8> %r
797}
798
799define <32 x i8> @vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
800; CHECK-LABEL: vec256_eltty_i8_source_subvec_0_target_subvec_mask_3_binary:
801; CHECK:       # %bb.0:
802; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,0,1]
803; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
804; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
805; CHECK-NEXT:    # ymm2 = mem[0,1,0,1]
806; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
807; CHECK-NEXT:    retq
808  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 32>
809  ret <32 x i8> %r
810}
811
812define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary(<32 x i8> %x) nounwind {
813; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_unary:
814; CHECK:       # %bb.0:
815; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
816; CHECK-NEXT:    vpbroadcastb %xmm1, %ymm1
817; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
818; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
819; CHECK-NEXT:    retq
820  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
821  ret <32 x i8> %r
822}
823
824define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
825; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_1_binary:
826; CHECK:       # %bb.0:
827; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm1
828; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
829; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
830; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
831; CHECK-NEXT:    retq
832  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
833  ret <32 x i8> %r
834}
835
836define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary(<32 x i8> %x) nounwind {
837; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_unary:
838; CHECK:       # %bb.0:
839; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,16]
840; CHECK-NEXT:    retq
841  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
842  ret <32 x i8> %r
843}
844
845define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
846; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_2_binary:
847; CHECK:       # %bb.0:
848; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm1
849; CHECK-NEXT:    vpbroadcastb %xmm1, %ymm1
850; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
851; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
852; CHECK-NEXT:    retq
853  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>
854  ret <32 x i8> %r
855}
856
857define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary(<32 x i8> %x) nounwind {
858; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_unary:
859; CHECK:       # %bb.0:
860; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
861; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
862; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
863; CHECK-NEXT:    # ymm2 = mem[0,1,0,1]
864; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
865; CHECK-NEXT:    retq
866  %r = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
867  ret <32 x i8> %r
868}
869
870define <32 x i8> @vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary(<32 x i8> %x, <32 x i8> %y) nounwind {
871; CHECK-LABEL: vec256_eltty_i8_source_subvec_1_target_subvec_mask_3_binary:
872; CHECK:       # %bb.0:
873; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
874; CHECK-NEXT:    vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
875; CHECK-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
876; CHECK-NEXT:    # ymm2 = mem[0,1,0,1]
877; CHECK-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
878; CHECK-NEXT:    retq
879  %r = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 48, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 48>
880  ret <32 x i8> %r
881}
882