xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll (revision be6c752e157638849f1f59f7e2b7ecbe11a022fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-FAST
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-FAST
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-ALL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-PERLANE
9
10define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
11; AVX1-LABEL: shuffle_v4f64_0000:
12; AVX1:       # %bb.0:
13; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
14; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
15; AVX1-NEXT:    retq
16;
17; AVX2-LABEL: shuffle_v4f64_0000:
18; AVX2:       # %bb.0:
19; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
20; AVX2-NEXT:    retq
21;
22; AVX512VL-LABEL: shuffle_v4f64_0000:
23; AVX512VL:       # %bb.0:
24; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
25; AVX512VL-NEXT:    retq
26  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
27  ret <4 x double> %shuffle
28}
29
30define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
31; AVX1-LABEL: shuffle_v4f64_0001:
32; AVX1:       # %bb.0:
33; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
34; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
35; AVX1-NEXT:    retq
36;
37; AVX2-LABEL: shuffle_v4f64_0001:
38; AVX2:       # %bb.0:
39; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
40; AVX2-NEXT:    retq
41;
42; AVX512VL-LABEL: shuffle_v4f64_0001:
43; AVX512VL:       # %bb.0:
44; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
45; AVX512VL-NEXT:    retq
46  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
47  ret <4 x double> %shuffle
48}
49
50define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
51; AVX1-LABEL: shuffle_v4f64_0020:
52; AVX1:       # %bb.0:
53; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
54; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
55; AVX1-NEXT:    retq
56;
57; AVX2-LABEL: shuffle_v4f64_0020:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
60; AVX2-NEXT:    retq
61;
62; AVX512VL-LABEL: shuffle_v4f64_0020:
63; AVX512VL:       # %bb.0:
64; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
65; AVX512VL-NEXT:    retq
66  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
67  ret <4 x double> %shuffle
68}
69
70define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
71; AVX1-LABEL: shuffle_v4f64_0300:
72; AVX1:       # %bb.0:
73; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
74; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
75; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
76; AVX1-NEXT:    retq
77;
78; AVX2-LABEL: shuffle_v4f64_0300:
79; AVX2:       # %bb.0:
80; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
81; AVX2-NEXT:    retq
82;
83; AVX512VL-LABEL: shuffle_v4f64_0300:
84; AVX512VL:       # %bb.0:
85; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
86; AVX512VL-NEXT:    retq
87  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
88  ret <4 x double> %shuffle
89}
90
91define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
92; AVX1-LABEL: shuffle_v4f64_1000:
93; AVX1:       # %bb.0:
94; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
95; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
96; AVX1-NEXT:    retq
97;
98; AVX2-LABEL: shuffle_v4f64_1000:
99; AVX2:       # %bb.0:
100; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
101; AVX2-NEXT:    retq
102;
103; AVX512VL-LABEL: shuffle_v4f64_1000:
104; AVX512VL:       # %bb.0:
105; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
106; AVX512VL-NEXT:    retq
107  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
108  ret <4 x double> %shuffle
109}
110
111define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
112; AVX1-LABEL: shuffle_v4f64_2200:
113; AVX1:       # %bb.0:
114; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
115; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
116; AVX1-NEXT:    retq
117;
118; AVX2-LABEL: shuffle_v4f64_2200:
119; AVX2:       # %bb.0:
120; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
121; AVX2-NEXT:    retq
122;
123; AVX512VL-LABEL: shuffle_v4f64_2200:
124; AVX512VL:       # %bb.0:
125; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
126; AVX512VL-NEXT:    retq
127  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
128  ret <4 x double> %shuffle
129}
130
131define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) {
132; AVX1-LABEL: shuffle_v4f64_2222:
133; AVX1:       # %bb.0:
134; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
135; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
136; AVX1-NEXT:    retq
137;
138; AVX2-LABEL: shuffle_v4f64_2222:
139; AVX2:       # %bb.0:
140; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
141; AVX2-NEXT:    retq
142;
143; AVX512VL-LABEL: shuffle_v4f64_2222:
144; AVX512VL:       # %bb.0:
145; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
146; AVX512VL-NEXT:    retq
147  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
148  ret <4 x double> %shuffle
149}
150
151define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) {
152; AVX1-LABEL: shuffle_v4f64_2222_bc:
153; AVX1:       # %bb.0:
154; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
155; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
156; AVX1-NEXT:    retq
157;
158; AVX2-LABEL: shuffle_v4f64_2222_bc:
159; AVX2:       # %bb.0:
160; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
161; AVX2-NEXT:    retq
162;
163; AVX512VL-LABEL: shuffle_v4f64_2222_bc:
164; AVX512VL:       # %bb.0:
165; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
166; AVX512VL-NEXT:    retq
167  %tmp0 = bitcast <4 x i64> %a to <4 x double>
168  %tmp1 = bitcast <4 x i64> %b to <4 x double>
169  %shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
170  ret <4 x double> %shuffle
171}
172
173define <4 x double> @shuffle_v4f64_2233(<4 x double> %a, <4 x double> %b) {
174; AVX1-LABEL: shuffle_v4f64_2233:
175; AVX1:       # %bb.0:
176; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
177; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0,0,3,3]
178; AVX1-NEXT:    retq
179;
180; AVX2-LABEL: shuffle_v4f64_2233:
181; AVX2:       # %bb.0:
182; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
183; AVX2-NEXT:    retq
184;
185; AVX512VL-LABEL: shuffle_v4f64_2233:
186; AVX512VL:       # %bb.0:
187; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
188; AVX512VL-NEXT:    retq
189  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
190  ret <4 x double> %shuffle
191}
192
193define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
194; AVX1-LABEL: shuffle_v4f64_3330:
195; AVX1:       # %bb.0:
196; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
197; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
198; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2]
199; AVX1-NEXT:    retq
200;
201; AVX2-LABEL: shuffle_v4f64_3330:
202; AVX2:       # %bb.0:
203; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
204; AVX2-NEXT:    retq
205;
206; AVX512VL-LABEL: shuffle_v4f64_3330:
207; AVX512VL:       # %bb.0:
208; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
209; AVX512VL-NEXT:    retq
210  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
211  ret <4 x double> %shuffle
212}
213
214define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
215; AVX1-LABEL: shuffle_v4f64_3210:
216; AVX1:       # %bb.0:
217; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
218; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
219; AVX1-NEXT:    retq
220;
221; AVX2-LABEL: shuffle_v4f64_3210:
222; AVX2:       # %bb.0:
223; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
224; AVX2-NEXT:    retq
225;
226; AVX512VL-LABEL: shuffle_v4f64_3210:
227; AVX512VL:       # %bb.0:
228; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
229; AVX512VL-NEXT:    retq
230  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
231  ret <4 x double> %shuffle
232}
233
234define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
235; ALL-LABEL: shuffle_v4f64_0023:
236; ALL:       # %bb.0:
237; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
238; ALL-NEXT:    retq
239
240  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
241  ret <4 x double> %shuffle
242}
243
244define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
245; ALL-LABEL: shuffle_v4f64_0022:
246; ALL:       # %bb.0:
247; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
248; ALL-NEXT:    retq
249  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
250  ret <4 x double> %shuffle
251}
252
253define <4 x double> @shuffle_v4f64mem_0022(ptr %ptr, <4 x double> %b) {
254; ALL-LABEL: shuffle_v4f64mem_0022:
255; ALL:       # %bb.0:
256; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
257; ALL-NEXT:    retq
258  %a = load  <4 x double>,  ptr %ptr
259  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
260  ret <4 x double> %shuffle
261}
262
263define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
264; ALL-LABEL: shuffle_v4f64_1032:
265; ALL:       # %bb.0:
266; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
267; ALL-NEXT:    retq
268  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
269  ret <4 x double> %shuffle
270}
271
272define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
273; ALL-LABEL: shuffle_v4f64_1133:
274; ALL:       # %bb.0:
275; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
276; ALL-NEXT:    retq
277  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
278  ret <4 x double> %shuffle
279}
280
281define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
282; ALL-LABEL: shuffle_v4f64_1023:
283; ALL:       # %bb.0:
284; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
285; ALL-NEXT:    retq
286  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
287  ret <4 x double> %shuffle
288}
289
290define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
291; ALL-LABEL: shuffle_v4f64_1022:
292; ALL:       # %bb.0:
293; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
294; ALL-NEXT:    retq
295  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
296  ret <4 x double> %shuffle
297}
298
299define <4 x double> @shuffle_v4f64_0213(<4 x double> %a, <4 x double> %b) {
300; AVX1-LABEL: shuffle_v4f64_0213:
301; AVX1:       # %bb.0:
302; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
303; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
304; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
305; AVX1-NEXT:    retq
306;
307; AVX2-LABEL: shuffle_v4f64_0213:
308; AVX2:       # %bb.0:
309; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
310; AVX2-NEXT:    retq
311;
312; AVX512VL-LABEL: shuffle_v4f64_0213:
313; AVX512VL:       # %bb.0:
314; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
315; AVX512VL-NEXT:    retq
316  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
317  ret <4 x double> %shuffle
318}
319
320define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
321; AVX1OR2-LABEL: shuffle_v4f64_0423:
322; AVX1OR2:       # %bb.0:
323; AVX1OR2-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
324; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
325; AVX1OR2-NEXT:    retq
326;
327; AVX512VL-SLOW-LABEL: shuffle_v4f64_0423:
328; AVX512VL-SLOW:       # %bb.0:
329; AVX512VL-SLOW-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
330; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
331; AVX512VL-SLOW-NEXT:    retq
332;
333; AVX512VL-FAST-LABEL: shuffle_v4f64_0423:
334; AVX512VL-FAST:       # %bb.0:
335; AVX512VL-FAST-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,2,3]
336; AVX512VL-FAST-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
337; AVX512VL-FAST-NEXT:    retq
338  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
339  ret <4 x double> %shuffle
340}
341
342define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
343; AVX1OR2-LABEL: shuffle_v4f64_0462:
344; AVX1OR2:       # %bb.0:
345; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
346; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
347; AVX1OR2-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
348; AVX1OR2-NEXT:    retq
349;
350; AVX512VL-LABEL: shuffle_v4f64_0462:
351; AVX512VL:       # %bb.0:
352; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,6,2]
353; AVX512VL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
354; AVX512VL-NEXT:    retq
355  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
356  ret <4 x double> %shuffle
357}
358
359define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
360; ALL-LABEL: shuffle_v4f64_0426:
361; ALL:       # %bb.0:
362; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
363; ALL-NEXT:    retq
364  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
365  ret <4 x double> %shuffle
366}
367
368define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
369; ALL-LABEL: shuffle_v4f64_1537:
370; ALL:       # %bb.0:
371; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
372; ALL-NEXT:    retq
373  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
374  ret <4 x double> %shuffle
375}
376
377define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
378; ALL-LABEL: shuffle_v4f64_4062:
379; ALL:       # %bb.0:
380; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
381; ALL-NEXT:    retq
382  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
383  ret <4 x double> %shuffle
384}
385
386define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
387; ALL-LABEL: shuffle_v4f64_5173:
388; ALL:       # %bb.0:
389; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
390; ALL-NEXT:    retq
391  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
392  ret <4 x double> %shuffle
393}
394
395define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
396; ALL-LABEL: shuffle_v4f64_5163:
397; ALL:       # %bb.0:
398; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
399; ALL-NEXT:    retq
400  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
401  ret <4 x double> %shuffle
402}
403
404define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
405; ALL-LABEL: shuffle_v4f64_0527:
406; ALL:       # %bb.0:
407; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
408; ALL-NEXT:    retq
409  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
410  ret <4 x double> %shuffle
411}
412
413define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
414; ALL-LABEL: shuffle_v4f64_4163:
415; ALL:       # %bb.0:
416; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
417; ALL-NEXT:    retq
418  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
419  ret <4 x double> %shuffle
420}
421
422define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
423; ALL-LABEL: shuffle_v4f64_0145:
424; ALL:       # %bb.0:
425; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
426; ALL-NEXT:    retq
427  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
428  ret <4 x double> %shuffle
429}
430
431define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
432; ALL-LABEL: shuffle_v4f64_4501:
433; ALL:       # %bb.0:
434; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
435; ALL-NEXT:    retq
436  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
437  ret <4 x double> %shuffle
438}
439
440define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
441; ALL-LABEL: shuffle_v4f64_0167:
442; ALL:       # %bb.0:
443; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
444; ALL-NEXT:    retq
445  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
446  ret <4 x double> %shuffle
447}
448
449define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
450; AVX1OR2-LABEL: shuffle_v4f64_1054:
451; AVX1OR2:       # %bb.0:
452; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
453; AVX1OR2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
454; AVX1OR2-NEXT:    retq
455;
456; AVX512VL-SLOW-LABEL: shuffle_v4f64_1054:
457; AVX512VL-SLOW:       # %bb.0:
458; AVX512VL-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
459; AVX512VL-SLOW-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
460; AVX512VL-SLOW-NEXT:    retq
461;
462; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_1054:
463; AVX512VL-FAST-ALL:       # %bb.0:
464; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [1,0,5,4]
465; AVX512VL-FAST-ALL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
466; AVX512VL-FAST-ALL-NEXT:    retq
467;
468; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_1054:
469; AVX512VL-FAST-PERLANE:       # %bb.0:
470; AVX512VL-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
471; AVX512VL-FAST-PERLANE-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
472; AVX512VL-FAST-PERLANE-NEXT:    retq
473  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
474  ret <4 x double> %shuffle
475}
476
477define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
478; AVX1OR2-LABEL: shuffle_v4f64_3254:
479; AVX1OR2:       # %bb.0:
480; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
481; AVX1OR2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
482; AVX1OR2-NEXT:    retq
483;
484; AVX512VL-SLOW-LABEL: shuffle_v4f64_3254:
485; AVX512VL-SLOW:       # %bb.0:
486; AVX512VL-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
487; AVX512VL-SLOW-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
488; AVX512VL-SLOW-NEXT:    retq
489;
490; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_3254:
491; AVX512VL-FAST-ALL:       # %bb.0:
492; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [3,2,5,4]
493; AVX512VL-FAST-ALL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
494; AVX512VL-FAST-ALL-NEXT:    retq
495;
496; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_3254:
497; AVX512VL-FAST-PERLANE:       # %bb.0:
498; AVX512VL-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
499; AVX512VL-FAST-PERLANE-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
500; AVX512VL-FAST-PERLANE-NEXT:    retq
501  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
502  ret <4 x double> %shuffle
503}
504
505define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
506; AVX1OR2-LABEL: shuffle_v4f64_3276:
507; AVX1OR2:       # %bb.0:
508; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
509; AVX1OR2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
510; AVX1OR2-NEXT:    retq
511;
512; AVX512VL-SLOW-LABEL: shuffle_v4f64_3276:
513; AVX512VL-SLOW:       # %bb.0:
514; AVX512VL-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
515; AVX512VL-SLOW-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
516; AVX512VL-SLOW-NEXT:    retq
517;
518; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_3276:
519; AVX512VL-FAST-ALL:       # %bb.0:
520; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [3,2,7,6]
521; AVX512VL-FAST-ALL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
522; AVX512VL-FAST-ALL-NEXT:    retq
523;
524; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_3276:
525; AVX512VL-FAST-PERLANE:       # %bb.0:
526; AVX512VL-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
527; AVX512VL-FAST-PERLANE-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
528; AVX512VL-FAST-PERLANE-NEXT:    retq
529  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
530  ret <4 x double> %shuffle
531}
532
533define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
534; AVX1OR2-LABEL: shuffle_v4f64_1076:
535; AVX1OR2:       # %bb.0:
536; AVX1OR2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
537; AVX1OR2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
538; AVX1OR2-NEXT:    retq
539;
540; AVX512VL-SLOW-LABEL: shuffle_v4f64_1076:
541; AVX512VL-SLOW:       # %bb.0:
542; AVX512VL-SLOW-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
543; AVX512VL-SLOW-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
544; AVX512VL-SLOW-NEXT:    retq
545;
546; AVX512VL-FAST-LABEL: shuffle_v4f64_1076:
547; AVX512VL-FAST:       # %bb.0:
548; AVX512VL-FAST-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [1,0,7,6]
549; AVX512VL-FAST-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
550; AVX512VL-FAST-NEXT:    retq
551  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
552  ret <4 x double> %shuffle
553}
554
555define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
556; AVX1-LABEL: shuffle_v4f64_0415:
557; AVX1:       # %bb.0:
558; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
559; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
560; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
561; AVX1-NEXT:    retq
562;
563; AVX2-LABEL: shuffle_v4f64_0415:
564; AVX2:       # %bb.0:
565; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
566; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
567; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
568; AVX2-NEXT:    retq
569;
570; AVX512VL-LABEL: shuffle_v4f64_0415:
571; AVX512VL:       # %bb.0:
572; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,1,5]
573; AVX512VL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
574; AVX512VL-NEXT:    retq
575  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
576  ret <4 x double> %shuffle
577}
578
579define <4 x double> @shuffle_v4f64_2741(<4 x double> %a, <4 x double> %b) {
580; AVX1OR2-LABEL: shuffle_v4f64_2741:
581; AVX1OR2:       # %bb.0:
582; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[0,1]
583; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
584; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
585; AVX1OR2-NEXT:    retq
586;
587; AVX512VL-LABEL: shuffle_v4f64_2741:
588; AVX512VL:       # %bb.0:
589; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [2,7,4,1]
590; AVX512VL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
591; AVX512VL-NEXT:    retq
592  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 7, i32 4, i32 1>
593  ret <4 x double> %shuffle
594}
595
596define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
597; ALL-LABEL: shuffle_v4f64_u062:
598; ALL:       # %bb.0:
599; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
600; ALL-NEXT:    retq
601  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 poison, i32 0, i32 6, i32 2>
602  ret <4 x double> %shuffle
603}
604
605define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
606; ALL-LABEL: shuffle_v4f64_15uu:
607; ALL:       # %bb.0:
608; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
609; ALL-NEXT:    retq
610  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
611  ret <4 x double> %shuffle
612}
613
614define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
615; ALL-LABEL: shuffle_v4f64_11uu:
616; ALL:       # %bb.0:
617; ALL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,1]
618; ALL-NEXT:    retq
619  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 poison, i32 poison>
620  ret <4 x double> %shuffle
621}
622
623define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
624; AVX1-LABEL: shuffle_v4f64_22uu:
625; AVX1:       # %bb.0:
626; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
627; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
628; AVX1-NEXT:    retq
629;
630; AVX2-LABEL: shuffle_v4f64_22uu:
631; AVX2:       # %bb.0:
632; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
633; AVX2-NEXT:    retq
634;
635; AVX512VL-LABEL: shuffle_v4f64_22uu:
636; AVX512VL:       # %bb.0:
637; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
638; AVX512VL-NEXT:    retq
639  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 poison, i32 poison>
640  ret <4 x double> %shuffle
641}
642
643define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
644; AVX1-LABEL: shuffle_v4f64_3333:
645; AVX1:       # %bb.0:
646; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
647; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
648; AVX1-NEXT:    retq
649;
650; AVX2-LABEL: shuffle_v4f64_3333:
651; AVX2:       # %bb.0:
652; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
653; AVX2-NEXT:    retq
654;
655; AVX512VL-LABEL: shuffle_v4f64_3333:
656; AVX512VL:       # %bb.0:
657; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
658; AVX512VL-NEXT:    retq
659  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
660  ret <4 x double> %shuffle
661}
662
663define <4 x double> @shuffle_v4f64_0456(<4 x double> %a, <4 x double> %b) {
664; AVX1OR2-LABEL: shuffle_v4f64_0456:
665; AVX1OR2:       # %bb.0:
666; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
667; AVX1OR2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
668; AVX1OR2-NEXT:    retq
669;
670; AVX512VL-SLOW-LABEL: shuffle_v4f64_0456:
671; AVX512VL-SLOW:       # %bb.0:
672; AVX512VL-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
673; AVX512VL-SLOW-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
674; AVX512VL-SLOW-NEXT:    retq
675;
676; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_0456:
677; AVX512VL-FAST-ALL:       # %bb.0:
678; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [4,0,1,2]
679; AVX512VL-FAST-ALL-NEXT:    vpermi2pd %ymm0, %ymm1, %ymm2
680; AVX512VL-FAST-ALL-NEXT:    vmovapd %ymm2, %ymm0
681; AVX512VL-FAST-ALL-NEXT:    retq
682;
683; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_0456:
684; AVX512VL-FAST-PERLANE:       # %bb.0:
685; AVX512VL-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
686; AVX512VL-FAST-PERLANE-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
687; AVX512VL-FAST-PERLANE-NEXT:    retq
688  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
689  ret <4 x double> %shuffle
690}
691
692; PR59860
693define <4 x double> @shuffle_v4f64_0437(<4 x double> %a, <4 x double> %b) {
694; ALL-LABEL: shuffle_v4f64_0437:
695; ALL:       # %bb.0:
696; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
697; ALL-NEXT:    retq
698  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
699  ret <4 x double> %shuffle
700}
701
702; PR91433
703define <4 x double> @shuffle_v4f64_2303(<4 x double> %a) {
704; AVX1-LABEL: shuffle_v4f64_2303:
705; AVX1:       # %bb.0:
706; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
707; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
708; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
709; AVX1-NEXT:    retq
710;
711; AVX2-LABEL: shuffle_v4f64_2303:
712; AVX2:       # %bb.0:
713; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
714; AVX2-NEXT:    retq
715;
716; AVX512VL-LABEL: shuffle_v4f64_2303:
717; AVX512VL:       # %bb.0:
718; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
719; AVX512VL-NEXT:    retq
720  %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
721  ret <4 x double> %shuffle
722}
723
724define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) {
725; ALL-LABEL: shuffle_v4f64_0z3z:
726; ALL:       # %bb.0:
727; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
728; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
729; ALL-NEXT:    retq
730  %shuffle = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 0, i32 4, i32 3, i32 4>
731  ret <4 x double> %shuffle
732}
733
734define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) {
735; ALL-LABEL: shuffle_v4f64_1z2z:
736; ALL:       # %bb.0:
737; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
738; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[3]
739; ALL-NEXT:    retq
740  %1 = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 1, i32 4, i32 2, i32 4>
741  ret <4 x double> %1
742}
743
744define <4 x double> @shuffle_v4f64_0044(<4 x double> %a, <4 x double> %b) {
745; AVX1-LABEL: shuffle_v4f64_0044:
746; AVX1:       # %bb.0:
747; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
748; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
749; AVX1-NEXT:    retq
750;
751; AVX2-LABEL: shuffle_v4f64_0044:
752; AVX2:       # %bb.0:
753; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
754; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
755; AVX2-NEXT:    retq
756;
757; AVX512VL-SLOW-LABEL: shuffle_v4f64_0044:
758; AVX512VL-SLOW:       # %bb.0:
759; AVX512VL-SLOW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
760; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
761; AVX512VL-SLOW-NEXT:    retq
762;
763; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_0044:
764; AVX512VL-FAST-ALL:       # %bb.0:
765; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,0,4,4]
766; AVX512VL-FAST-ALL-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
767; AVX512VL-FAST-ALL-NEXT:    retq
768;
769; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_0044:
770; AVX512VL-FAST-PERLANE:       # %bb.0:
771; AVX512VL-FAST-PERLANE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
772; AVX512VL-FAST-PERLANE-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
773; AVX512VL-FAST-PERLANE-NEXT:    retq
774  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
775  ret <4 x double> %1
776}
777
778define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b) {
779; ALL-LABEL: shuffle_v4f64_0044_v2f64:
780; ALL:       # %bb.0:
781; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
782; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
783; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
784; ALL-NEXT:    retq
785  %1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 0, i32 0>
786  %2 = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> <i32 0, i32 0>
787  %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
788  ret <4 x double> %3
789}
790
791define <4 x double> @shuffle_v4f64_1032_v2f64(<2 x double> %a, <2 x double> %b) {
792; ALL-LABEL: shuffle_v4f64_1032_v2f64:
793; ALL:       # %bb.0:
794; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
795; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
796; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
797; ALL-NEXT:    retq
798  %1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
799  %2 = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> <i32 1, i32 0>
800  %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
801  ret <4 x double> %3
802}
803
804;PR34359
805define <4 x double> @shuffle_v4f64_2345_0567_select(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3) {
806; ALL-LABEL: shuffle_v4f64_2345_0567_select:
807; ALL:       # %bb.0:
808; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
809; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7]
810; ALL-NEXT:    retq
811  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
812  %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x double> %shuf, <4 x double> %vec3
813  ret <4 x double> %res
814}
815
816define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
817; AVX1-LABEL: shuffle_v4i64_0000:
818; AVX1:       # %bb.0:
819; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
820; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
821; AVX1-NEXT:    retq
822;
823; AVX2-LABEL: shuffle_v4i64_0000:
824; AVX2:       # %bb.0:
825; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
826; AVX2-NEXT:    retq
827;
828; AVX512VL-LABEL: shuffle_v4i64_0000:
829; AVX512VL:       # %bb.0:
830; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
831; AVX512VL-NEXT:    retq
832  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
833  ret <4 x i64> %shuffle
834}
835
836define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
837; AVX1-LABEL: shuffle_v4i64_0001:
838; AVX1:       # %bb.0:
839; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[0,1,0,1]
840; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
841; AVX1-NEXT:    retq
842;
843; AVX2-LABEL: shuffle_v4i64_0001:
844; AVX2:       # %bb.0:
845; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
846; AVX2-NEXT:    retq
847;
848; AVX512VL-LABEL: shuffle_v4i64_0001:
849; AVX512VL:       # %bb.0:
850; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
851; AVX512VL-NEXT:    retq
852  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
853  ret <4 x i64> %shuffle
854}
855
856define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
857; AVX1-LABEL: shuffle_v4i64_0020:
858; AVX1:       # %bb.0:
859; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
860; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
861; AVX1-NEXT:    retq
862;
863; AVX2-LABEL: shuffle_v4i64_0020:
864; AVX2:       # %bb.0:
865; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
866; AVX2-NEXT:    retq
867;
868; AVX512VL-LABEL: shuffle_v4i64_0020:
869; AVX512VL:       # %bb.0:
870; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
871; AVX512VL-NEXT:    retq
872  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
873  ret <4 x i64> %shuffle
874}
875
876define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
877; AVX1-LABEL: shuffle_v4i64_0112:
878; AVX1:       # %bb.0:
879; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
880; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[3],ymm0[2]
881; AVX1-NEXT:    retq
882;
883; AVX2-LABEL: shuffle_v4i64_0112:
884; AVX2:       # %bb.0:
885; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2]
886; AVX2-NEXT:    retq
887;
888; AVX512VL-LABEL: shuffle_v4i64_0112:
889; AVX512VL:       # %bb.0:
890; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2]
891; AVX512VL-NEXT:    retq
892  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
893  ret <4 x i64> %shuffle
894}
895
896define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
897; AVX1-LABEL: shuffle_v4i64_0300:
898; AVX1:       # %bb.0:
899; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
900; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
901; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
902; AVX1-NEXT:    retq
903;
904; AVX2-LABEL: shuffle_v4i64_0300:
905; AVX2:       # %bb.0:
906; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
907; AVX2-NEXT:    retq
908;
909; AVX512VL-LABEL: shuffle_v4i64_0300:
910; AVX512VL:       # %bb.0:
911; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
912; AVX512VL-NEXT:    retq
913  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
914  ret <4 x i64> %shuffle
915}
916
917define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
918; AVX1-LABEL: shuffle_v4i64_1000:
919; AVX1:       # %bb.0:
920; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
921; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
922; AVX1-NEXT:    retq
923;
924; AVX2-LABEL: shuffle_v4i64_1000:
925; AVX2:       # %bb.0:
926; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
927; AVX2-NEXT:    retq
928;
929; AVX512VL-LABEL: shuffle_v4i64_1000:
930; AVX512VL:       # %bb.0:
931; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
932; AVX512VL-NEXT:    retq
933  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
934  ret <4 x i64> %shuffle
935}
936
937define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
938; AVX1-LABEL: shuffle_v4i64_2200:
939; AVX1:       # %bb.0:
940; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
941; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
942; AVX1-NEXT:    retq
943;
944; AVX2-LABEL: shuffle_v4i64_2200:
945; AVX2:       # %bb.0:
946; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
947; AVX2-NEXT:    retq
948;
949; AVX512VL-LABEL: shuffle_v4i64_2200:
950; AVX512VL:       # %bb.0:
951; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
952; AVX512VL-NEXT:    retq
953  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
954  ret <4 x i64> %shuffle
955}
956
957define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
958; AVX1-LABEL: shuffle_v4i64_3330:
959; AVX1:       # %bb.0:
960; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
961; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
962; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2]
963; AVX1-NEXT:    retq
964;
965; AVX2-LABEL: shuffle_v4i64_3330:
966; AVX2:       # %bb.0:
967; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
968; AVX2-NEXT:    retq
969;
970; AVX512VL-LABEL: shuffle_v4i64_3330:
971; AVX512VL:       # %bb.0:
972; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
973; AVX512VL-NEXT:    retq
974  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
975  ret <4 x i64> %shuffle
976}
977
978define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
979; AVX1-LABEL: shuffle_v4i64_3210:
980; AVX1:       # %bb.0:
981; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
982; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
983; AVX1-NEXT:    retq
984;
985; AVX2-LABEL: shuffle_v4i64_3210:
986; AVX2:       # %bb.0:
987; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
988; AVX2-NEXT:    retq
989;
990; AVX512VL-LABEL: shuffle_v4i64_3210:
991; AVX512VL:       # %bb.0:
992; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
993; AVX512VL-NEXT:    retq
994  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
995  ret <4 x i64> %shuffle
996}
997
998define <4 x i64> @shuffle_v4i64_0213(<4 x i64> %a, <4 x i64> %b) {
999; AVX1-LABEL: shuffle_v4i64_0213:
1000; AVX1:       # %bb.0:
1001; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
1002; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1003; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
1004; AVX1-NEXT:    retq
1005;
1006; AVX2-LABEL: shuffle_v4i64_0213:
1007; AVX2:       # %bb.0:
1008; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1009; AVX2-NEXT:    retq
1010;
1011; AVX512VL-LABEL: shuffle_v4i64_0213:
1012; AVX512VL:       # %bb.0:
1013; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1014; AVX512VL-NEXT:    retq
1015  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1016  ret <4 x i64> %shuffle
1017}
1018
1019define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
1020; AVX1-LABEL: shuffle_v4i64_0124:
1021; AVX1:       # %bb.0:
1022; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
1023; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2]
1024; AVX1-NEXT:    retq
1025;
1026; AVX2-LABEL: shuffle_v4i64_0124:
1027; AVX2:       # %bb.0:
1028; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
1029; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
1030; AVX2-NEXT:    retq
1031;
1032; AVX512VL-SLOW-LABEL: shuffle_v4i64_0124:
1033; AVX512VL-SLOW:       # %bb.0:
1034; AVX512VL-SLOW-NEXT:    vbroadcastsd %xmm1, %ymm1
1035; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
1036; AVX512VL-SLOW-NEXT:    retq
1037;
1038; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_0124:
1039; AVX512VL-FAST-ALL:       # %bb.0:
1040; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,1,2,4]
1041; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1042; AVX512VL-FAST-ALL-NEXT:    retq
1043;
1044; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_0124:
1045; AVX512VL-FAST-PERLANE:       # %bb.0:
1046; AVX512VL-FAST-PERLANE-NEXT:    vbroadcastsd %xmm1, %ymm1
1047; AVX512VL-FAST-PERLANE-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
1048; AVX512VL-FAST-PERLANE-NEXT:    retq
1049  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
1050  ret <4 x i64> %shuffle
1051}
1052
1053define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
1054; AVX1-LABEL: shuffle_v4i64_0142:
1055; AVX1:       # %bb.0:
1056; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
1057; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[2]
1058; AVX1-NEXT:    retq
1059;
1060; AVX2-LABEL: shuffle_v4i64_0142:
1061; AVX2:       # %bb.0:
1062; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
1063; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
1064; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
1065; AVX2-NEXT:    retq
1066;
1067; AVX512VL-LABEL: shuffle_v4i64_0142:
1068; AVX512VL:       # %bb.0:
1069; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,1,4,2]
1070; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1071; AVX512VL-NEXT:    retq
1072  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
1073  ret <4 x i64> %shuffle
1074}
1075
1076define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
1077; AVX1-LABEL: shuffle_v4i64_0412:
1078; AVX1:       # %bb.0:
1079; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3]
1080; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1081; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
1082; AVX1-NEXT:    retq
1083;
1084; AVX2-LABEL: shuffle_v4i64_0412:
1085; AVX2:       # %bb.0:
1086; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1087; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2]
1088; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
1089; AVX2-NEXT:    retq
1090;
1091; AVX512VL-LABEL: shuffle_v4i64_0412:
1092; AVX512VL:       # %bb.0:
1093; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,1,2]
1094; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1095; AVX512VL-NEXT:    retq
1096  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
1097  ret <4 x i64> %shuffle
1098}
1099
1100define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
1101; AVX1-LABEL: shuffle_v4i64_4012:
1102; AVX1:       # %bb.0:
1103; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
1104; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[2]
1105; AVX1-NEXT:    retq
1106;
1107; AVX2-LABEL: shuffle_v4i64_4012:
1108; AVX2:       # %bb.0:
1109; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2]
1110; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1111; AVX2-NEXT:    retq
1112;
1113; AVX512VL-SLOW-LABEL: shuffle_v4i64_4012:
1114; AVX512VL-SLOW:       # %bb.0:
1115; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2]
1116; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1117; AVX512VL-SLOW-NEXT:    retq
1118;
1119; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_4012:
1120; AVX512VL-FAST-ALL:       # %bb.0:
1121; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [4,0,1,2]
1122; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1123; AVX512VL-FAST-ALL-NEXT:    retq
1124;
1125; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_4012:
1126; AVX512VL-FAST-PERLANE:       # %bb.0:
1127; AVX512VL-FAST-PERLANE-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2]
1128; AVX512VL-FAST-PERLANE-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1129; AVX512VL-FAST-PERLANE-NEXT:    retq
1130  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1131  ret <4 x i64> %shuffle
1132}
1133
1134define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
1135; ALL-LABEL: shuffle_v4i64_0145:
1136; ALL:       # %bb.0:
1137; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1138; ALL-NEXT:    retq
1139  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1140  ret <4 x i64> %shuffle
1141}
1142
1143define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
1144; AVX1-LABEL: shuffle_v4i64_0451:
1145; AVX1:       # %bb.0:
1146; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
1147; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1148; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1149; AVX1-NEXT:    retq
1150;
1151; AVX2-LABEL: shuffle_v4i64_0451:
1152; AVX2:       # %bb.0:
1153; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,3]
1154; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
1155; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
1156; AVX2-NEXT:    retq
1157;
1158; AVX512VL-LABEL: shuffle_v4i64_0451:
1159; AVX512VL:       # %bb.0:
1160; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,5,1]
1161; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1162; AVX512VL-NEXT:    retq
1163  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
1164  ret <4 x i64> %shuffle
1165}
1166
1167define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
1168; ALL-LABEL: shuffle_v4i64_4501:
1169; ALL:       # %bb.0:
1170; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1171; ALL-NEXT:    retq
1172  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1173  ret <4 x i64> %shuffle
1174}
1175
1176define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
1177; AVX1-LABEL: shuffle_v4i64_4015:
1178; AVX1:       # %bb.0:
1179; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1180; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1181; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1182; AVX1-NEXT:    retq
1183;
1184; AVX2-LABEL: shuffle_v4i64_4015:
1185; AVX2:       # %bb.0:
1186; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,2,1]
1187; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
1188; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
1189; AVX2-NEXT:    retq
1190;
1191; AVX512VL-LABEL: shuffle_v4i64_4015:
1192; AVX512VL:       # %bb.0:
1193; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [4,0,1,5]
1194; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1195; AVX512VL-NEXT:    retq
1196  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
1197  ret <4 x i64> %shuffle
1198}
1199
1200define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
1201; AVX1-LABEL: shuffle_v4i64_2u35:
1202; AVX1:       # %bb.0:
1203; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1204; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
1205; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
1206; AVX1-NEXT:    retq
1207;
1208; AVX2-LABEL: shuffle_v4i64_2u35:
1209; AVX2:       # %bb.0:
1210; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1211; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1]
1212; AVX2-NEXT:    retq
1213;
1214; AVX512VL-SLOW-LABEL: shuffle_v4i64_2u35:
1215; AVX512VL-SLOW:       # %bb.0:
1216; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1217; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1]
1218; AVX512VL-SLOW-NEXT:    retq
1219;
1220; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_2u35:
1221; AVX512VL-FAST-ALL:       # %bb.0:
1222; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [2,5,3,5]
1223; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1224; AVX512VL-FAST-ALL-NEXT:    retq
1225;
1226; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_2u35:
1227; AVX512VL-FAST-PERLANE:       # %bb.0:
1228; AVX512VL-FAST-PERLANE-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1229; AVX512VL-FAST-PERLANE-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1]
1230; AVX512VL-FAST-PERLANE-NEXT:    retq
1231  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 poison, i32 3, i32 5>
1232  ret <4 x i64> %shuffle
1233}
1234
1235define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
1236; AVX1-LABEL: shuffle_v4i64_1251:
1237; AVX1:       # %bb.0:
1238; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
1239; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1240; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[3],ymm2[3]
1241; AVX1-NEXT:    retq
1242;
1243; AVX2-LABEL: shuffle_v4i64_1251:
1244; AVX2:       # %bb.0:
1245; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[1,1,1,1]
1246; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,2,1]
1247; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
1248; AVX2-NEXT:    retq
1249;
1250; AVX512VL-LABEL: shuffle_v4i64_1251:
1251; AVX512VL:       # %bb.0:
1252; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [1,2,5,1]
1253; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1254; AVX512VL-NEXT:    retq
1255  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
1256  ret <4 x i64> %shuffle
1257}
1258
1259define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
1260; AVX1-LABEL: shuffle_v4i64_1054:
1261; AVX1:       # %bb.0:
1262; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1263; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1264; AVX1-NEXT:    retq
1265;
1266; AVX2-LABEL: shuffle_v4i64_1054:
1267; AVX2:       # %bb.0:
1268; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1269; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1270; AVX2-NEXT:    retq
1271;
1272; AVX512VL-SLOW-LABEL: shuffle_v4i64_1054:
1273; AVX512VL-SLOW:       # %bb.0:
1274; AVX512VL-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1275; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1276; AVX512VL-SLOW-NEXT:    retq
1277;
1278; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_1054:
1279; AVX512VL-FAST-ALL:       # %bb.0:
1280; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [1,0,5,4]
1281; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1282; AVX512VL-FAST-ALL-NEXT:    retq
1283;
1284; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_1054:
1285; AVX512VL-FAST-PERLANE:       # %bb.0:
1286; AVX512VL-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1287; AVX512VL-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1288; AVX512VL-FAST-PERLANE-NEXT:    retq
1289  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
1290  ret <4 x i64> %shuffle
1291}
1292
1293define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
1294; AVX1-LABEL: shuffle_v4i64_3254:
1295; AVX1:       # %bb.0:
1296; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1297; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1298; AVX1-NEXT:    retq
1299;
1300; AVX2-LABEL: shuffle_v4i64_3254:
1301; AVX2:       # %bb.0:
1302; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1303; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
1304; AVX2-NEXT:    retq
1305;
1306; AVX512VL-SLOW-LABEL: shuffle_v4i64_3254:
1307; AVX512VL-SLOW:       # %bb.0:
1308; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1309; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
1310; AVX512VL-SLOW-NEXT:    retq
1311;
1312; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_3254:
1313; AVX512VL-FAST-ALL:       # %bb.0:
1314; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [3,2,5,4]
1315; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1316; AVX512VL-FAST-ALL-NEXT:    retq
1317;
1318; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_3254:
1319; AVX512VL-FAST-PERLANE:       # %bb.0:
1320; AVX512VL-FAST-PERLANE-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1321; AVX512VL-FAST-PERLANE-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
1322; AVX512VL-FAST-PERLANE-NEXT:    retq
1323  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
1324  ret <4 x i64> %shuffle
1325}
1326
1327define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
1328; AVX1-LABEL: shuffle_v4i64_3276:
1329; AVX1:       # %bb.0:
1330; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1331; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1332; AVX1-NEXT:    retq
1333;
1334; AVX2-LABEL: shuffle_v4i64_3276:
1335; AVX2:       # %bb.0:
1336; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1337; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1338; AVX2-NEXT:    retq
1339;
1340; AVX512VL-SLOW-LABEL: shuffle_v4i64_3276:
1341; AVX512VL-SLOW:       # %bb.0:
1342; AVX512VL-SLOW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1343; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1344; AVX512VL-SLOW-NEXT:    retq
1345;
1346; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_3276:
1347; AVX512VL-FAST-ALL:       # %bb.0:
1348; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [3,2,7,6]
1349; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1350; AVX512VL-FAST-ALL-NEXT:    retq
1351;
1352; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_3276:
1353; AVX512VL-FAST-PERLANE:       # %bb.0:
1354; AVX512VL-FAST-PERLANE-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1355; AVX512VL-FAST-PERLANE-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1356; AVX512VL-FAST-PERLANE-NEXT:    retq
1357  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
1358  ret <4 x i64> %shuffle
1359}
1360
1361define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
1362; AVX1-LABEL: shuffle_v4i64_1076:
1363; AVX1:       # %bb.0:
1364; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1365; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1366; AVX1-NEXT:    retq
1367;
1368; AVX2-LABEL: shuffle_v4i64_1076:
1369; AVX2:       # %bb.0:
1370; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1371; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1372; AVX2-NEXT:    retq
1373;
1374; AVX512VL-SLOW-LABEL: shuffle_v4i64_1076:
1375; AVX512VL-SLOW:       # %bb.0:
1376; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1377; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1378; AVX512VL-SLOW-NEXT:    retq
1379;
1380; AVX512VL-FAST-LABEL: shuffle_v4i64_1076:
1381; AVX512VL-FAST:       # %bb.0:
1382; AVX512VL-FAST-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [1,0,7,6]
1383; AVX512VL-FAST-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1384; AVX512VL-FAST-NEXT:    retq
1385  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
1386  ret <4 x i64> %shuffle
1387}
1388
1389define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
1390; AVX1-LABEL: shuffle_v4i64_0415:
1391; AVX1:       # %bb.0:
1392; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1393; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1394; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1395; AVX1-NEXT:    retq
1396;
1397; AVX2-LABEL: shuffle_v4i64_0415:
1398; AVX2:       # %bb.0:
1399; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
1400; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
1401; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1402; AVX2-NEXT:    retq
1403;
1404; AVX512VL-LABEL: shuffle_v4i64_0415:
1405; AVX512VL:       # %bb.0:
1406; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,1,5]
1407; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1408; AVX512VL-NEXT:    retq
1409  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1410  ret <4 x i64> %shuffle
1411}
1412
1413define <4 x i64> @shuffle_v4i64_2741(<4 x i64> %a, <4 x i64> %b) {
1414; AVX1-LABEL: shuffle_v4i64_2741:
1415; AVX1:       # %bb.0:
1416; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[0,1]
1417; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1418; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
1419; AVX1-NEXT:    retq
1420;
1421; AVX2-LABEL: shuffle_v4i64_2741:
1422; AVX2:       # %bb.0:
1423; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
1424; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1425; AVX2-NEXT:    retq
1426;
1427; AVX512VL-SLOW-LABEL: shuffle_v4i64_2741:
1428; AVX512VL-SLOW:       # %bb.0:
1429; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
1430; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1431; AVX512VL-SLOW-NEXT:    retq
1432;
1433; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_2741:
1434; AVX512VL-FAST-ALL:       # %bb.0:
1435; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [2,7,4,1]
1436; AVX512VL-FAST-ALL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1437; AVX512VL-FAST-ALL-NEXT:    retq
1438;
1439; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_2741:
1440; AVX512VL-FAST-PERLANE:       # %bb.0:
1441; AVX512VL-FAST-PERLANE-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
1442; AVX512VL-FAST-PERLANE-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
1443; AVX512VL-FAST-PERLANE-NEXT:    retq
1444  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 7, i32 4, i32 1>
1445  ret <4 x i64> %shuffle
1446}
1447
1448; PR59860
1449define <4 x i64> @shuffle_v4i64_0437(<4 x i64> %a, <4 x i64> %b) {
1450; AVX1-LABEL: shuffle_v4i64_0437:
1451; AVX1:       # %bb.0:
1452; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
1453; AVX1-NEXT:    retq
1454;
1455; AVX2-LABEL: shuffle_v4i64_0437:
1456; AVX2:       # %bb.0:
1457; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,3]
1458; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,3,3]
1459; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1460; AVX2-NEXT:    retq
1461;
1462; AVX512VL-LABEL: shuffle_v4i64_0437:
1463; AVX512VL:       # %bb.0:
1464; AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,4,3,7]
1465; AVX512VL-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
1466; AVX512VL-NEXT:    retq
1467  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
1468  ret <4 x i64> %shuffle
1469}
1470
1471define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
1472; AVX1-LABEL: shuffle_v4i64_z4z6:
1473; AVX1:       # %bb.0:
1474; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1475; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1476; AVX1-NEXT:    retq
1477;
1478; AVX2-LABEL: shuffle_v4i64_z4z6:
1479; AVX2:       # %bb.0:
1480; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1481; AVX2-NEXT:    retq
1482;
1483; AVX512VL-LABEL: shuffle_v4i64_z4z6:
1484; AVX512VL:       # %bb.0:
1485; AVX512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1486; AVX512VL-NEXT:    retq
1487  %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
1488  ret <4 x i64> %shuffle
1489}
1490
1491define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
1492; AVX1-LABEL: shuffle_v4i64_5zuz:
1493; AVX1:       # %bb.0:
1494; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1495; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1496; AVX1-NEXT:    retq
1497;
1498; AVX2-LABEL: shuffle_v4i64_5zuz:
1499; AVX2:       # %bb.0:
1500; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1501; AVX2-NEXT:    retq
1502;
1503; AVX512VL-LABEL: shuffle_v4i64_5zuz:
1504; AVX512VL:       # %bb.0:
1505; AVX512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1506; AVX512VL-NEXT:    retq
1507  %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 poison, i32 0>
1508  ret <4 x i64> %shuffle
1509}
1510
1511define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
1512; ALL-LABEL: shuffle_v4i64_40u2:
1513; ALL:       # %bb.0:
1514; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1515; ALL-NEXT:    retq
1516  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 poison, i32 2>
1517  ret <4 x i64> %shuffle
1518}
1519
1520define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
1521; ALL-LABEL: shuffle_v4i64_15uu:
1522; ALL:       # %bb.0:
1523; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1524; ALL-NEXT:    retq
1525  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
1526  ret <4 x i64> %shuffle
1527}
1528
1529define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
1530; ALL-LABEL: shuffle_v4i64_11uu:
1531; ALL:       # %bb.0:
1532; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
1533; ALL-NEXT:    retq
1534  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 poison, i32 poison>
1535  ret <4 x i64> %shuffle
1536}
1537
1538define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
1539; AVX1-LABEL: shuffle_v4i64_22uu:
1540; AVX1:       # %bb.0:
1541; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1542; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
1543; AVX1-NEXT:    retq
1544;
1545; AVX2-LABEL: shuffle_v4i64_22uu:
1546; AVX2:       # %bb.0:
1547; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
1548; AVX2-NEXT:    retq
1549;
1550; AVX512VL-LABEL: shuffle_v4i64_22uu:
1551; AVX512VL:       # %bb.0:
1552; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
1553; AVX512VL-NEXT:    retq
1554  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 poison, i32 poison>
1555  ret <4 x i64> %shuffle
1556}
1557
1558define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
1559; AVX1-LABEL: shuffle_v4i64_3333:
1560; AVX1:       # %bb.0:
1561; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1562; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
1563; AVX1-NEXT:    retq
1564;
1565; AVX2-LABEL: shuffle_v4i64_3333:
1566; AVX2:       # %bb.0:
1567; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1568; AVX2-NEXT:    retq
1569;
1570; AVX512VL-LABEL: shuffle_v4i64_3333:
1571; AVX512VL:       # %bb.0:
1572; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1573; AVX512VL-NEXT:    retq
1574  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1575  ret <4 x i64> %shuffle
1576}
1577
1578define <4 x i64> @shuffle_v4i64_1z3z(<4 x i64> %a, <4 x i64> %b) {
1579; AVX1-LABEL: shuffle_v4i64_1z3z:
1580; AVX1:       # %bb.0:
1581; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1582; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1583; AVX1-NEXT:    retq
1584;
1585; AVX2-LABEL: shuffle_v4i64_1z3z:
1586; AVX2:       # %bb.0:
1587; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1588; AVX2-NEXT:    retq
1589;
1590; AVX512VL-LABEL: shuffle_v4i64_1z3z:
1591; AVX512VL:       # %bb.0:
1592; AVX512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1593; AVX512VL-NEXT:    retq
1594  %shuffle = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
1595  ret <4 x i64> %shuffle
1596}
1597
1598define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) {
1599; AVX1-LABEL: shuffle_v4i64_0044_v2i64:
1600; AVX1:       # %bb.0:
1601; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1602; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1603; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1604; AVX1-NEXT:    retq
1605;
1606; AVX2-LABEL: shuffle_v4i64_0044_v2i64:
1607; AVX2:       # %bb.0:
1608; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1609; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1610; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1611; AVX2-NEXT:    retq
1612;
1613; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64:
1614; AVX512VL:       # %bb.0:
1615; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1616; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1617; AVX512VL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1618; AVX512VL-NEXT:    retq
1619  %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 0, i32 0>
1620  %2 = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> <i32 0, i32 0>
1621  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1622  ret <4 x i64> %3
1623}
1624
1625define <4 x i64> @shuffle_v4i64_1032_v2i64(<2 x i64> %a, <2 x i64> %b) {
1626; ALL-LABEL: shuffle_v4i64_1032_v2i64:
1627; ALL:       # %bb.0:
1628; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1629; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1630; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1631; ALL-NEXT:    retq
1632  %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
1633  %2 = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
1634  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1635  ret <4 x i64> %3
1636}
1637
1638define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
1639; ALL-LABEL: stress_test1:
1640; ALL:         retq
1641  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
1642  %d = shufflevector <4 x i64> %c, <4 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 2, i32 poison>
1643  %e = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 1, i32 poison>
1644  %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
1645
1646  ret <4 x i64> %f
1647}
1648
1649define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
1650; ALL-LABEL: insert_reg_and_zero_v4i64:
1651; ALL:       # %bb.0:
1652; ALL-NEXT:    vmovq %rdi, %xmm0
1653; ALL-NEXT:    retq
1654  %v = insertelement <4 x i64> poison, i64 %a, i64 0
1655  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1656  ret <4 x i64> %shuffle
1657}
1658
1659define <4 x i64> @insert_mem_and_zero_v4i64(ptr %ptr) {
1660; ALL-LABEL: insert_mem_and_zero_v4i64:
1661; ALL:       # %bb.0:
1662; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1663; ALL-NEXT:    retq
1664  %a = load i64, ptr %ptr
1665  %v = insertelement <4 x i64> poison, i64 %a, i64 0
1666  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1667  ret <4 x i64> %shuffle
1668}
1669
1670define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
1671; ALL-LABEL: insert_reg_and_zero_v4f64:
1672; ALL:       # %bb.0:
1673; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
1674; ALL-NEXT:    retq
1675  %v = insertelement <4 x double> poison, double %a, i32 0
1676  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1677  ret <4 x double> %shuffle
1678}
1679
1680define <4 x double> @insert_mem_and_zero_v4f64(ptr %ptr) {
1681; ALL-LABEL: insert_mem_and_zero_v4f64:
1682; ALL:       # %bb.0:
1683; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1684; ALL-NEXT:    retq
1685  %a = load double, ptr %ptr
1686  %v = insertelement <4 x double> poison, double %a, i32 0
1687  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1688  ret <4 x double> %shuffle
1689}
1690
1691define <4 x double> @splat_mem_v4f64(ptr %ptr) {
1692; ALL-LABEL: splat_mem_v4f64:
1693; ALL:       # %bb.0:
1694; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1695; ALL-NEXT:    retq
1696  %a = load double, ptr %ptr
1697  %v = insertelement <4 x double> poison, double %a, i32 0
1698  %shuffle = shufflevector <4 x double> %v, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1699  ret <4 x double> %shuffle
1700}
1701
1702define <4 x i64> @splat_mem_v4i64(ptr %ptr) {
1703; ALL-LABEL: splat_mem_v4i64:
1704; ALL:       # %bb.0:
1705; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1706; ALL-NEXT:    retq
1707  %a = load i64, ptr %ptr
1708  %v = insertelement <4 x i64> poison, i64 %a, i64 0
1709  %shuffle = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1710  ret <4 x i64> %shuffle
1711}
1712
1713define <4 x double> @splat_mem_v4f64_2(ptr %p) {
1714; ALL-LABEL: splat_mem_v4f64_2:
1715; ALL:       # %bb.0:
1716; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1717; ALL-NEXT:    retq
1718  %1 = load double, ptr %p
1719  %2 = insertelement <2 x double> poison, double %1, i32 0
1720  %3 = shufflevector <2 x double> %2, <2 x double> poison, <4 x i32> zeroinitializer
1721  ret <4 x double> %3
1722}
1723
1724define <4 x double> @splat_v4f64(<2 x double> %r) {
1725; AVX1-LABEL: splat_v4f64:
1726; AVX1:       # %bb.0:
1727; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1728; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1729; AVX1-NEXT:    retq
1730;
1731; AVX2-LABEL: splat_v4f64:
1732; AVX2:       # %bb.0:
1733; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
1734; AVX2-NEXT:    retq
1735;
1736; AVX512VL-LABEL: splat_v4f64:
1737; AVX512VL:       # %bb.0:
1738; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
1739; AVX512VL-NEXT:    retq
1740  %1 = shufflevector <2 x double> %r, <2 x double> poison, <4 x i32> zeroinitializer
1741  ret <4 x double> %1
1742}
1743
1744define <4 x i64> @splat_mem_v4i64_from_v2i64(ptr %ptr) {
1745; ALL-LABEL: splat_mem_v4i64_from_v2i64:
1746; ALL:       # %bb.0:
1747; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1748; ALL-NEXT:    retq
1749  %v = load <2 x i64>, ptr %ptr
1750  %shuffle = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1751  ret <4 x i64> %shuffle
1752}
1753
1754define <4 x double> @splat_mem_v4f64_from_v2f64(ptr %ptr) {
1755; ALL-LABEL: splat_mem_v4f64_from_v2f64:
1756; ALL:       # %bb.0:
1757; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1758; ALL-NEXT:    retq
1759  %v = load <2 x double>, ptr %ptr
1760  %shuffle = shufflevector <2 x double> %v, <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1761  ret <4 x double> %shuffle
1762}
1763
1764define <4 x i64> @splat128_mem_v4i64_from_v2i64(ptr %ptr) {
1765; AVX1OR2-LABEL: splat128_mem_v4i64_from_v2i64:
1766; AVX1OR2:       # %bb.0:
1767; AVX1OR2-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
1768; AVX1OR2-NEXT:    retq
1769;
1770; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
1771; AVX512VL:       # %bb.0:
1772; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
1773; AVX512VL-NEXT:    retq
1774  %v = load <2 x i64>, ptr %ptr
1775  %shuffle = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1776  ret <4 x i64> %shuffle
1777}
1778
1779define <4 x double> @splat128_mem_v4f64_from_v2f64(ptr %ptr) {
1780; ALL-LABEL: splat128_mem_v4f64_from_v2f64:
1781; ALL:       # %bb.0:
1782; ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
1783; ALL-NEXT:    retq
1784  %v = load <2 x double>, ptr %ptr
1785  %shuffle = shufflevector <2 x double> %v, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1786  ret <4 x double> %shuffle
1787}
1788
1789define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
1790; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64:
1791; AVX1:       # %bb.0:
1792; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
1793; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1794; AVX1-NEXT:    retq
1795;
1796; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64:
1797; AVX2:       # %bb.0:
1798; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
1799; AVX2-NEXT:    retq
1800;
1801; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64:
1802; AVX512VL:       # %bb.0:
1803; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
1804; AVX512VL-NEXT:    retq
1805  %1 = shufflevector <2 x i64> %a0, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1806  %2 = bitcast <4 x i64> %1 to <4 x double>
1807  %3 = shufflevector <4 x double> %2, <4 x double> poison, <4 x i32> zeroinitializer
1808  ret <4 x double> %3
1809}
1810
1811; PR114959
1812define <4 x double> @concat_v4f64_0213_broadcasts(ptr %src) {
1813; ALL-LABEL: concat_v4f64_0213_broadcasts:
1814; ALL:       # %bb.0:
1815; ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
1816; ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
1817; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
1818; ALL-NEXT:    retq
1819  %src.hi = getelementptr inbounds i8, ptr %src, i64 32
1820  %lo = load <2 x double>, ptr %src, align 1
1821  %hi = load <2 x double>, ptr %src.hi, align 1
1822  %shuffle = shufflevector <2 x double> %lo, <2 x double> %hi, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1823  ret <4 x double> %shuffle
1824}
1825
1826define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
1827; ALL-LABEL: bitcast_v4f64_0426:
1828; ALL:       # %bb.0:
1829; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1830; ALL-NEXT:    retq
1831  %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
1832  %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
1833  %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1834  %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
1835  %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
1836  %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
1837  ret <4 x double> %bitcast64
1838}
1839
1840define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
1841; ALL-LABEL: concat_v4i64_0167:
1842; ALL:       # %bb.0:
1843; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1844; ALL-NEXT:    retq
1845  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1846  %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
1847  %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1848  ret <4 x i64> %shuffle64
1849}
1850
1851define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
1852; ALL-LABEL: concat_v4i64_0145_bc:
1853; ALL:       # %bb.0:
1854; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1855; ALL-NEXT:    retq
1856  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1857  %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
1858  %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
1859  %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
1860  %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1861  %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
1862  ret <4 x i64> %shuffle64
1863}
1864
1865define <4 x i64> @insert_dup_mem_v4i64(ptr %ptr) {
1866; ALL-LABEL: insert_dup_mem_v4i64:
1867; ALL:       # %bb.0:
1868; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1869; ALL-NEXT:    retq
1870  %tmp = load i64, ptr %ptr, align 1
1871  %tmp1 = insertelement <2 x i64> poison, i64 %tmp, i32 0
1872  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <4 x i32> zeroinitializer
1873  ret <4 x i64> %tmp2
1874}
1875
1876define <4 x i64> @shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b) {
1877; AVX1-LABEL: shuffle_v4i64_1234:
1878; AVX1:       # %bb.0:
1879; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1]
1880; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
1881; AVX1-NEXT:    retq
1882;
1883; AVX2-LABEL: shuffle_v4i64_1234:
1884; AVX2:       # %bb.0:
1885; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1886; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0]
1887; AVX2-NEXT:    retq
1888;
1889; AVX512VL-LABEL: shuffle_v4i64_1234:
1890; AVX512VL:       # %bb.0:
1891; AVX512VL-NEXT:    valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0]
1892; AVX512VL-NEXT:    retq
1893  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
1894  ret <4 x i64> %shuffle
1895}
1896
1897define <4 x i64> @shuffle_v4i64_1230(<4 x i64> %a) {
1898; AVX1-LABEL: shuffle_v4i64_1230:
1899; AVX1:       # %bb.0:
1900; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1901; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
1902; AVX1-NEXT:    retq
1903;
1904; AVX2-LABEL: shuffle_v4i64_1230:
1905; AVX2:       # %bb.0:
1906; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0]
1907; AVX2-NEXT:    retq
1908;
1909; AVX512VL-LABEL: shuffle_v4i64_1230:
1910; AVX512VL:       # %bb.0:
1911; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0]
1912; AVX512VL-NEXT:    retq
1913  %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
1914  ret <4 x i64> %shuffle
1915}
1916
1917define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) {
1918; AVX1-LABEL: shuffle_v4i64_z0z3:
1919; AVX1:       # %bb.0:
1920; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1921; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3]
1922; AVX1-NEXT:    retq
1923;
1924; AVX2-SLOW-LABEL: shuffle_v4i64_z0z3:
1925; AVX2-SLOW:       # %bb.0:
1926; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
1927; AVX2-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1928; AVX2-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1929; AVX2-SLOW-NEXT:    retq
1930;
1931; AVX2-FAST-LABEL: shuffle_v4i64_z0z3:
1932; AVX2-FAST:       # %bb.0:
1933; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31]
1934; AVX2-FAST-NEXT:    retq
1935;
1936; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3:
1937; AVX512VL-SLOW:       # %bb.0:
1938; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
1939; AVX512VL-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1940; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1941; AVX512VL-SLOW-NEXT:    retq
1942;
1943; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3:
1944; AVX512VL-FAST:       # %bb.0:
1945; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31]
1946; AVX512VL-FAST-NEXT:    retq
1947  %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 4, i32 0, i32 4, i32 3>
1948  ret <4 x i64> %1
1949}
1950
1951define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) {
1952; AVX1-LABEL: shuffle_v4i64_1z2z:
1953; AVX1:       # %bb.0:
1954; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1955; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[3]
1956; AVX1-NEXT:    retq
1957;
1958; AVX2-SLOW-LABEL: shuffle_v4i64_1z2z:
1959; AVX2-SLOW:       # %bb.0:
1960; AVX2-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1961; AVX2-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1962; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
1963; AVX2-SLOW-NEXT:    retq
1964;
1965; AVX2-FAST-LABEL: shuffle_v4i64_1z2z:
1966; AVX2-FAST:       # %bb.0:
1967; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero
1968; AVX2-FAST-NEXT:    retq
1969;
1970; AVX512VL-SLOW-LABEL: shuffle_v4i64_1z2z:
1971; AVX512VL-SLOW:       # %bb.0:
1972; AVX512VL-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1973; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
1974; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
1975; AVX512VL-SLOW-NEXT:    retq
1976;
1977; AVX512VL-FAST-LABEL: shuffle_v4i64_1z2z:
1978; AVX512VL-FAST:       # %bb.0:
1979; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero
1980; AVX512VL-FAST-NEXT:    retq
1981  %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 1, i32 4, i32 2, i32 4>
1982  ret <4 x i64> %1
1983}
1984
1985define <4 x double> @add_v4f64_0246_1357(<4 x double> %a, <4 x double> %b) {
1986; AVX1-LABEL: add_v4f64_0246_1357:
1987; AVX1:       # %bb.0: # %entry
1988; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1989; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1990; AVX1-NEXT:    vhaddpd %ymm2, %ymm0, %ymm0
1991; AVX1-NEXT:    retq
1992;
1993; AVX2-LABEL: add_v4f64_0246_1357:
1994; AVX2:       # %bb.0: # %entry
1995; AVX2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
1996; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
1997; AVX2-NEXT:    retq
1998;
1999; AVX512VL-LABEL: add_v4f64_0246_1357:
2000; AVX512VL:       # %bb.0: # %entry
2001; AVX512VL-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2002; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
2003; AVX512VL-NEXT:    retq
2004entry:
2005  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
2006  %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
2007  %add = fadd <4 x double> %shuffle, %shuffle1
2008  ret <4 x double> %add
2009}
2010
2011define <4 x double> @add_v4f64_4602_5713(<4 x double> %a, <4 x double> %b) {
2012; AVX1-LABEL: add_v4f64_4602_5713:
2013; AVX1:       # %bb.0: # %entry
2014; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3]
2015; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2016; AVX1-NEXT:    vhaddpd %ymm2, %ymm0, %ymm0
2017; AVX1-NEXT:    retq
2018;
2019; AVX2-LABEL: add_v4f64_4602_5713:
2020; AVX2:       # %bb.0: # %entry
2021; AVX2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2022; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2]
2023; AVX2-NEXT:    retq
2024;
2025; AVX512VL-LABEL: add_v4f64_4602_5713:
2026; AVX512VL:       # %bb.0: # %entry
2027; AVX512VL-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2028; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2]
2029; AVX512VL-NEXT:    retq
2030entry:
2031  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2>
2032  %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
2033  %add = fadd <4 x double> %shuffle, %shuffle1
2034  ret <4 x double> %add
2035}
2036
2037define <4 x double> @add_v4f64_024u_135u_reverse(<4 x double> %a, <4 x double> %b) {
2038; AVX1-LABEL: add_v4f64_024u_135u_reverse:
2039; AVX1:       # %bb.0:
2040; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
2041; AVX1-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2042; AVX1-NEXT:    retq
2043;
2044; AVX2-LABEL: add_v4f64_024u_135u_reverse:
2045; AVX2:       # %bb.0:
2046; AVX2-NEXT:    vhaddpd %ymm0, %ymm1, %ymm0
2047; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,1]
2048; AVX2-NEXT:    retq
2049;
2050; AVX512VL-LABEL: add_v4f64_024u_135u_reverse:
2051; AVX512VL:       # %bb.0:
2052; AVX512VL-NEXT:    vhaddpd %ymm0, %ymm1, %ymm0
2053; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,1]
2054; AVX512VL-NEXT:    retq
2055  %shuffle0 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 poison>
2056  %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 poison>
2057  %add = fadd <4 x double> %shuffle0, %shuffle1
2058  %shuffle = shufflevector <4 x double> %add, <4 x double> poison, <4 x i32> <i32 poison, i32 2, i32 1, i32 0>
2059  ret <4 x double> %shuffle
2060}
2061
2062define <4 x i64> @add_v4i64_0246_1357(<4 x i64> %a, <4 x i64> %b) {
2063; AVX1-LABEL: add_v4i64_0246_1357:
2064; AVX1:       # %bb.0: # %entry
2065; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
2066; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2067; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
2068; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
2069; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2070; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2071; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
2072; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
2073; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2074; AVX1-NEXT:    retq
2075;
2076; AVX2-LABEL: add_v4i64_0246_1357:
2077; AVX2:       # %bb.0: # %entry
2078; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2079; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2080; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2081; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2082; AVX2-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2083; AVX2-NEXT:    retq
2084;
2085; AVX512VL-SLOW-LABEL: add_v4i64_0246_1357:
2086; AVX512VL-SLOW:       # %bb.0: # %entry
2087; AVX512VL-SLOW-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2088; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2089; AVX512VL-SLOW-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2090; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2091; AVX512VL-SLOW-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2092; AVX512VL-SLOW-NEXT:    retq
2093;
2094; AVX512VL-FAST-ALL-LABEL: add_v4i64_0246_1357:
2095; AVX512VL-FAST-ALL:       # %bb.0: # %entry
2096; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,2,4,6]
2097; AVX512VL-FAST-ALL-NEXT:    vpermi2q %ymm1, %ymm0, %ymm2
2098; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm3 = [1,3,5,7]
2099; AVX512VL-FAST-ALL-NEXT:    vpermi2q %ymm1, %ymm0, %ymm3
2100; AVX512VL-FAST-ALL-NEXT:    vpaddq %ymm3, %ymm2, %ymm0
2101; AVX512VL-FAST-ALL-NEXT:    retq
2102;
2103; AVX512VL-FAST-PERLANE-LABEL: add_v4i64_0246_1357:
2104; AVX512VL-FAST-PERLANE:       # %bb.0: # %entry
2105; AVX512VL-FAST-PERLANE-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2106; AVX512VL-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2107; AVX512VL-FAST-PERLANE-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2108; AVX512VL-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2109; AVX512VL-FAST-PERLANE-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2110; AVX512VL-FAST-PERLANE-NEXT:    retq
2111entry:
2112  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
2113  %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
2114  %add = add <4 x i64> %shuffle, %shuffle1
2115  ret <4 x i64> %add
2116}
2117
2118define <4 x i64> @add_v4i64_4602_5713(<4 x i64> %a, <4 x i64> %b) {
2119; AVX1-LABEL: add_v4i64_4602_5713:
2120; AVX1:       # %bb.0: # %entry
2121; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3]
2122; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2123; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
2124; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
2125; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2126; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2127; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
2128; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
2129; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2130; AVX1-NEXT:    retq
2131;
2132; AVX2-LABEL: add_v4i64_4602_5713:
2133; AVX2:       # %bb.0: # %entry
2134; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
2135; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2136; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
2137; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2138; AVX2-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2139; AVX2-NEXT:    retq
2140;
2141; AVX512VL-SLOW-LABEL: add_v4i64_4602_5713:
2142; AVX512VL-SLOW:       # %bb.0: # %entry
2143; AVX512VL-SLOW-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
2144; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2145; AVX512VL-SLOW-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
2146; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2147; AVX512VL-SLOW-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2148; AVX512VL-SLOW-NEXT:    retq
2149;
2150; AVX512VL-FAST-ALL-LABEL: add_v4i64_4602_5713:
2151; AVX512VL-FAST-ALL:       # %bb.0: # %entry
2152; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm2 = [0,2,4,6]
2153; AVX512VL-FAST-ALL-NEXT:    vpermi2q %ymm0, %ymm1, %ymm2
2154; AVX512VL-FAST-ALL-NEXT:    vpmovsxbq {{.*#+}} ymm3 = [1,3,5,7]
2155; AVX512VL-FAST-ALL-NEXT:    vpermi2q %ymm0, %ymm1, %ymm3
2156; AVX512VL-FAST-ALL-NEXT:    vpaddq %ymm3, %ymm2, %ymm0
2157; AVX512VL-FAST-ALL-NEXT:    retq
2158;
2159; AVX512VL-FAST-PERLANE-LABEL: add_v4i64_4602_5713:
2160; AVX512VL-FAST-PERLANE:       # %bb.0: # %entry
2161; AVX512VL-FAST-PERLANE-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
2162; AVX512VL-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2163; AVX512VL-FAST-PERLANE-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
2164; AVX512VL-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2165; AVX512VL-FAST-PERLANE-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
2166; AVX512VL-FAST-PERLANE-NEXT:    retq
2167entry:
2168  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2>
2169  %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
2170  %add = add <4 x i64> %shuffle, %shuffle1
2171  ret <4 x i64> %add
2172}
2173
2174define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
2175; ALL-LABEL: shuffle_v4f64_0zzz_optsize:
2176; ALL:       # %bb.0:
2177; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2178; ALL-NEXT:    retq
2179  %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2180  ret <4 x double> %b
2181}
2182
2183define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
2184; ALL-LABEL: shuffle_v4i64_0zzz_optsize:
2185; ALL:       # %bb.0:
2186; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2187; ALL-NEXT:    retq
2188  %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2189  ret <4 x i64> %b
2190}
2191
2192define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize {
2193; ALL-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
2194; ALL:       # %bb.0:
2195; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2196; ALL-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2197; ALL-NEXT:    retq
2198  %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2199  ret <8 x float> %b
2200}
2201
2202define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
2203; ALL-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
2204; ALL:       # %bb.0:
2205; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2206; ALL-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2207; ALL-NEXT:    retq
2208  %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2209  ret <8 x i32> %b
2210}
2211
2212define <4 x double> @shuffle_v4f64_0zzz_pgso(<4 x double> %a) !prof !14 {
2213; ALL-LABEL: shuffle_v4f64_0zzz_pgso:
2214; ALL:       # %bb.0:
2215; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2216; ALL-NEXT:    retq
2217  %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2218  ret <4 x double> %b
2219}
2220
2221define <4 x i64> @shuffle_v4i64_0zzz_pgso(<4 x i64> %a) !prof !14 {
2222; ALL-LABEL: shuffle_v4i64_0zzz_pgso:
2223; ALL:       # %bb.0:
2224; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2225; ALL-NEXT:    retq
2226  %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
2227  ret <4 x i64> %b
2228}
2229
2230define <8 x float> @shuffle_v8f32_0zzzzzzz_pgso(<8 x float> %a) !prof !14 {
2231; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
2232; AVX1OR2:       # %bb.0:
2233; AVX1OR2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2234; AVX1OR2-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2235; AVX1OR2-NEXT:    retq
2236;
2237; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
2238; AVX512VL:       # %bb.0:
2239; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2240; AVX512VL-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2241; AVX512VL-NEXT:    retq
2242  %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2243  ret <8 x float> %b
2244}
2245
2246define <8 x i32> @shuffle_v8i32_0zzzzzzz_pgso(<8 x i32> %a) !prof !14 {
2247; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
2248; AVX1OR2:       # %bb.0:
2249; AVX1OR2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2250; AVX1OR2-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2251; AVX1OR2-NEXT:    retq
2252;
2253; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
2254; AVX512VL:       # %bb.0:
2255; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2256; AVX512VL-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2257; AVX512VL-NEXT:    retq
2258  %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2259  ret <8 x i32> %b
2260}
2261
2262define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) {
2263; ALL-LABEL: unpckh_v4i64:
2264; ALL:       # %bb.0:
2265; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm1
2266; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2267; ALL-NEXT:    retq
2268  %unpckh = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 1, i32 7, i32 poison, i32 poison>
2269  ret <4 x i64> %unpckh
2270}
2271
2272define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
2273; ALL-LABEL: unpckh_v4f64:
2274; ALL:       # %bb.0:
2275; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm1
2276; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2277; ALL-NEXT:    retq
2278  %unpckh = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 7, i32 poison, i32 poison>
2279  ret <4 x double> %unpckh
2280}
2281
2282!llvm.module.flags = !{!0}
2283!0 = !{i32 1, !"ProfileSummary", !1}
2284!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
2285!2 = !{!"ProfileFormat", !"InstrProf"}
2286!3 = !{!"TotalCount", i64 10000}
2287!4 = !{!"MaxCount", i64 10}
2288!5 = !{!"MaxInternalCount", i64 1}
2289!6 = !{!"MaxFunctionCount", i64 1000}
2290!7 = !{!"NumCounts", i64 3}
2291!8 = !{!"NumFunctions", i64 3}
2292!9 = !{!"DetailedSummary", !10}
2293!10 = !{!11, !12, !13}
2294!11 = !{i32 10000, i64 100, i32 1}
2295!12 = !{i32 999000, i64 100, i32 1}
2296!13 = !{i32 999999, i64 1, i32 2}
2297!14 = !{!"function_entry_count", i64 0}
2298