xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll (revision be6c752e157638849f1f59f7e2b7ecbe11a022fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX512VBMI
6
7define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a)  {
8; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
9; ALL:       # %bb.0:
10; ALL-NEXT:    vpsrld $16, %xmm0, %xmm0
11; ALL-NEXT:    retq
12  %b = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
13  ret <64 x i8> %b
14}
15
16define <64 x i8> @shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) {
17; AVX512F-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
18; AVX512F:       # %bb.0:
19; AVX512F-NEXT:    vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
20; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
21; AVX512F-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
22; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
23; AVX512F-NEXT:    retq
24;
25; AVX512BW-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
26; AVX512BW:       # %bb.0:
27; AVX512BW-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
28; AVX512BW-NEXT:    retq
29;
30; AVX512DQ-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
31; AVX512DQ:       # %bb.0:
32; AVX512DQ-NEXT:    vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
33; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
34; AVX512DQ-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
35; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
36; AVX512DQ-NEXT:    retq
37;
38; AVX512VBMI-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
39; AVX512VBMI:       # %bb.0:
40; AVX512VBMI-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
41; AVX512VBMI-NEXT:    retq
42  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
43  ret <64 x i8> %shuffle
44}
45
46define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz(<64 x i8> %a, <64 x i8> %b) {
47; AVX512F-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
48; AVX512F:       # %bb.0:
49; AVX512F-NEXT:    vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
50; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
51; AVX512F-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
52; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
53; AVX512F-NEXT:    retq
54;
55; AVX512BW-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
56; AVX512BW:       # %bb.0:
57; AVX512BW-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero
58; AVX512BW-NEXT:    retq
59;
60; AVX512DQ-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
61; AVX512DQ:       # %bb.0:
62; AVX512DQ-NEXT:    vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
63; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
64; AVX512DQ-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
65; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
66; AVX512DQ-NEXT:    retq
67;
68; AVX512VBMI-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
69; AVX512VBMI:       # %bb.0:
70; AVX512VBMI-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero
71; AVX512VBMI-NEXT:    retq
72  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 64, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 64, i32 64, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 64, i32 64, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 64>
73  ret <64 x i8> %shuffle
74}
75
76define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) {
77; AVX512F-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
78; AVX512F:       # %bb.0:
79; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
80; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
81; AVX512F-NEXT:    vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
82; AVX512F-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
83; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
84; AVX512F-NEXT:    retq
85;
86; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
87; AVX512BW:       # %bb.0:
88; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
89; AVX512BW-NEXT:    retq
90;
91; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
92; AVX512DQ:       # %bb.0:
93; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
94; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
95; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
96; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
97; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
98; AVX512DQ-NEXT:    retq
99;
100; AVX512VBMI-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
101; AVX512VBMI:       # %bb.0:
102; AVX512VBMI-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
103; AVX512VBMI-NEXT:    retq
104  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
105  ret <64 x i8> %shuffle
106}
107
108
109define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) {
110; AVX512F-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
111; AVX512F:       # %bb.0:
112; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = [255,0]
113; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
114; AVX512F-NEXT:    retq
115;
116; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
117; AVX512BW:       # %bb.0:
118; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = [255,0]
119; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
120; AVX512BW-NEXT:    retq
121;
122; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
123; AVX512DQ:       # %bb.0:
124; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = [255,0]
125; AVX512DQ-NEXT:    vandps %zmm1, %zmm0, %zmm0
126; AVX512DQ-NEXT:    retq
127;
128; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
129; AVX512VBMI:       # %bb.0:
130; AVX512VBMI-NEXT:    vpmovzxbq {{.*#+}} xmm1 = [255,0]
131; AVX512VBMI-NEXT:    vpandq %zmm1, %zmm0, %zmm0
132; AVX512VBMI-NEXT:    retq
133  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
134  ret <64 x i8> %shuffle
135}
136
137define <64 x i8> @shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<64 x i8> %a, <64 x i8> %b) {
138; AVX512F-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
139; AVX512F:       # %bb.0:
140; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
141; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
142; AVX512F-NEXT:    retq
143;
144; AVX512BW-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
145; AVX512BW:       # %bb.0:
146; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
147; AVX512BW-NEXT:    retq
148;
149; AVX512DQ-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
150; AVX512DQ:       # %bb.0:
151; AVX512DQ-NEXT:    vpbroadcastb %xmm0, %ymm0
152; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
153; AVX512DQ-NEXT:    retq
154;
155; AVX512VBMI-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
156; AVX512VBMI:       # %bb.0:
157; AVX512VBMI-NEXT:    vpbroadcastb %xmm0, %zmm0
158; AVX512VBMI-NEXT:    retq
159  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
160  ret <64 x i8> %shuffle
161}
162
163define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<64 x i8> %a) {
164; AVX512F-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
165; AVX512F:       # %bb.0:
166; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
167; AVX512F-NEXT:    # ymm1 = mem[0,1,0,1]
168; AVX512F-NEXT:    vpshufb %ymm1, %ymm0, %ymm2
169; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
170; AVX512F-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
171; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
172; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
173; AVX512F-NEXT:    retq
174;
175; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
176; AVX512BW:       # %bb.0:
177; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48]
178; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
179; AVX512BW-NEXT:    retq
180;
181; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
182; AVX512DQ:       # %bb.0:
183; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
184; AVX512DQ-NEXT:    # ymm1 = mem[0,1,0,1]
185; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm0, %ymm2
186; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
187; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
188; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
189; AVX512DQ-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
190; AVX512DQ-NEXT:    retq
191;
192; AVX512VBMI-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
193; AVX512VBMI:       # %bb.0:
194; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
195; AVX512VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
196; AVX512VBMI-NEXT:    retq
197  %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
198  ret <64 x i8> %shuffle
199}
200
201; PR44379
202define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57(<64 x i8> %a) {
203; ALL-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57:
204; ALL:       # %bb.0:
205; ALL-NEXT:    vprolq $48, %zmm0, %zmm0
206; ALL-NEXT:    retq
207  %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 40, i32 41, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 56, i32 57>
208  ret <64 x i8> %shuffle
209}
210
211; PR54658
212define <64 x i8> @shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05(<64 x i8> %a) {
213; AVX512F-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05:
214; AVX512F:       # %bb.0:
215; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,22,23]
216; AVX512F-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u]
217; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
218; AVX512F-NEXT:    vpshufb {{.*#+}} xmm4 = xmm3[u,u,u,u,u,u,u,u,1,3,2,5,7,6,9,11]
219; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm4
220; AVX512F-NEXT:    vpermt2d %zmm4, %zmm1, %zmm2
221; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm3[2,3,4,5,6,7]
222; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[10,13,15,14,1,3,2,5,u,u,u,u,u,u,u,u,26,29,31,30,17,19,18,21,23,22,25,27,u,u,u,u]
223; AVX512F-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,5,6,4,1,1,1,1]
224; AVX512F-NEXT:    vpermd %ymm0, %ymm1, %ymm0
225; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
226; AVX512F-NEXT:    retq
227;
228; AVX512BW-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05:
229; AVX512BW:       # %bb.0:
230; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u,33,35,34,37,39,38,41,43,42,45,47,46,u,u,u,u,49,51,50,53,55,54,57,59,58,61,63,62,u,u,u,u]
231; AVX512BW-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,1,2,4,5,6,8,9,10,12,13,14,0,0,0,0]
232; AVX512BW-NEXT:    vpermd %zmm0, %zmm1, %zmm0
233; AVX512BW-NEXT:    retq
234;
235; AVX512DQ-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05:
236; AVX512DQ:       # %bb.0:
237; AVX512DQ-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,22,23]
238; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u]
239; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
240; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm4 = xmm3[u,u,u,u,u,u,u,u,1,3,2,5,7,6,9,11]
241; AVX512DQ-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm4
242; AVX512DQ-NEXT:    vpermt2d %zmm4, %zmm1, %zmm2
243; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm3[2,3,4,5,6,7]
244; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[10,13,15,14,1,3,2,5,u,u,u,u,u,u,u,u,26,29,31,30,17,19,18,21,23,22,25,27,u,u,u,u]
245; AVX512DQ-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [0,5,6,4,1,1,1,1]
246; AVX512DQ-NEXT:    vpermd %ymm0, %ymm1, %ymm0
247; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
248; AVX512DQ-NEXT:    retq
249;
250; AVX512VBMI-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05:
251; AVX512VBMI:       # %bb.0:
252; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,3,2,5,7,6,9,11,10,13,15,14,17,19,18,21,23,22,25,27,26,29,31,30,33,35,34,37,39,38,41,43,42,45,47,46,49,51,50,53,55,54,57,59,58,61,63,62,1,3,2,5,1,3,2,5,1,3,2,5,1,3,2,5]
253; AVX512VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
254; AVX512VBMI-NEXT:    retq
255  %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 1, i32 3, i32 2, i32 5, i32 7, i32 6, i32 9, i32 11, i32 10, i32 13, i32 15, i32 14, i32 17, i32 19, i32 18, i32 21, i32 23, i32 22, i32 25, i32 27, i32 26, i32 29, i32 31, i32 30, i32 33, i32 35, i32 34, i32 37, i32 39, i32 38, i32 41, i32 43, i32 42, i32 45, i32 47, i32 46, i32 49, i32 51, i32 50, i32 53, i32 55, i32 54, i32 57, i32 59, i32 58, i32 61, i32 63, i32 62, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5>
256  ret <64 x i8> %shuffle
257}
258
259define <64 x i8> @insert_dup_mem_v64i8_i32(ptr %ptr) {
260; AVX512F-LABEL: insert_dup_mem_v64i8_i32:
261; AVX512F:       # %bb.0:
262; AVX512F-NEXT:    vpbroadcastb (%rdi), %ymm0
263; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
264; AVX512F-NEXT:    retq
265;
266; AVX512BW-LABEL: insert_dup_mem_v64i8_i32:
267; AVX512BW:       # %bb.0:
268; AVX512BW-NEXT:    vpbroadcastb (%rdi), %zmm0
269; AVX512BW-NEXT:    retq
270;
271; AVX512DQ-LABEL: insert_dup_mem_v64i8_i32:
272; AVX512DQ:       # %bb.0:
273; AVX512DQ-NEXT:    vpbroadcastb (%rdi), %ymm0
274; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
275; AVX512DQ-NEXT:    retq
276;
277; AVX512VBMI-LABEL: insert_dup_mem_v64i8_i32:
278; AVX512VBMI:       # %bb.0:
279; AVX512VBMI-NEXT:    vpbroadcastb (%rdi), %zmm0
280; AVX512VBMI-NEXT:    retq
281  %tmp = load i32, ptr %ptr, align 4
282  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
283  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
284  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> zeroinitializer
285  ret <64 x i8> %tmp3
286}
287
288define <64 x i8> @insert_dup_mem_v64i8_sext_i8(ptr %ptr) {
289; AVX512F-LABEL: insert_dup_mem_v64i8_sext_i8:
290; AVX512F:       # %bb.0:
291; AVX512F-NEXT:    vpbroadcastb (%rdi), %ymm0
292; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
293; AVX512F-NEXT:    retq
294;
295; AVX512BW-LABEL: insert_dup_mem_v64i8_sext_i8:
296; AVX512BW:       # %bb.0:
297; AVX512BW-NEXT:    vpbroadcastb (%rdi), %zmm0
298; AVX512BW-NEXT:    retq
299;
300; AVX512DQ-LABEL: insert_dup_mem_v64i8_sext_i8:
301; AVX512DQ:       # %bb.0:
302; AVX512DQ-NEXT:    vpbroadcastb (%rdi), %ymm0
303; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
304; AVX512DQ-NEXT:    retq
305;
306; AVX512VBMI-LABEL: insert_dup_mem_v64i8_sext_i8:
307; AVX512VBMI:       # %bb.0:
308; AVX512VBMI-NEXT:    vpbroadcastb (%rdi), %zmm0
309; AVX512VBMI-NEXT:    retq
310  %tmp = load i8, ptr %ptr, align 1
311  %tmp1 = sext i8 %tmp to i32
312  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
313  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
314  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <64 x i32> zeroinitializer
315  ret <64 x i8> %tmp4
316}
317
318define <64 x i8> @insert_dup_elt1_mem_v64i8_i32(ptr %ptr) {
319; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_i32:
320; AVX512F:       # %bb.0:
321; AVX512F-NEXT:    vpbroadcastb 1(%rdi), %ymm0
322; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
323; AVX512F-NEXT:    retq
324;
325; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_i32:
326; AVX512BW:       # %bb.0:
327; AVX512BW-NEXT:    vpbroadcastb 1(%rdi), %zmm0
328; AVX512BW-NEXT:    retq
329;
330; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_i32:
331; AVX512DQ:       # %bb.0:
332; AVX512DQ-NEXT:    vpbroadcastb 1(%rdi), %ymm0
333; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
334; AVX512DQ-NEXT:    retq
335;
336; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_i32:
337; AVX512VBMI:       # %bb.0:
338; AVX512VBMI-NEXT:    vpbroadcastb 1(%rdi), %zmm0
339; AVX512VBMI-NEXT:    retq
340  %tmp = load i32, ptr %ptr, align 4
341  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
342  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
343  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
344  ret <64 x i8> %tmp3
345}
346
347define <64 x i8> @insert_dup_elt3_mem_v64i8_i32(ptr %ptr) {
348; AVX512F-LABEL: insert_dup_elt3_mem_v64i8_i32:
349; AVX512F:       # %bb.0:
350; AVX512F-NEXT:    vpbroadcastb 3(%rdi), %ymm0
351; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
352; AVX512F-NEXT:    retq
353;
354; AVX512BW-LABEL: insert_dup_elt3_mem_v64i8_i32:
355; AVX512BW:       # %bb.0:
356; AVX512BW-NEXT:    vpbroadcastb 3(%rdi), %zmm0
357; AVX512BW-NEXT:    retq
358;
359; AVX512DQ-LABEL: insert_dup_elt3_mem_v64i8_i32:
360; AVX512DQ:       # %bb.0:
361; AVX512DQ-NEXT:    vpbroadcastb 3(%rdi), %ymm0
362; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
363; AVX512DQ-NEXT:    retq
364;
365; AVX512VBMI-LABEL: insert_dup_elt3_mem_v64i8_i32:
366; AVX512VBMI:       # %bb.0:
367; AVX512VBMI-NEXT:    vpbroadcastb 3(%rdi), %zmm0
368; AVX512VBMI-NEXT:    retq
369  %tmp = load i32, ptr %ptr, align 4
370  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
371  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
372  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
373  ret <64 x i8> %tmp3
374}
375
376define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(ptr %ptr) {
377; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
378; AVX512F:       # %bb.0:
379; AVX512F-NEXT:    movsbl (%rdi), %eax
380; AVX512F-NEXT:    shrl $8, %eax
381; AVX512F-NEXT:    vmovd %eax, %xmm0
382; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
383; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
384; AVX512F-NEXT:    retq
385;
386; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
387; AVX512BW:       # %bb.0:
388; AVX512BW-NEXT:    movsbl (%rdi), %eax
389; AVX512BW-NEXT:    shrl $8, %eax
390; AVX512BW-NEXT:    vpbroadcastb %eax, %zmm0
391; AVX512BW-NEXT:    retq
392;
393; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
394; AVX512DQ:       # %bb.0:
395; AVX512DQ-NEXT:    movsbl (%rdi), %eax
396; AVX512DQ-NEXT:    shrl $8, %eax
397; AVX512DQ-NEXT:    vmovd %eax, %xmm0
398; AVX512DQ-NEXT:    vpbroadcastb %xmm0, %ymm0
399; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
400; AVX512DQ-NEXT:    retq
401;
402; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
403; AVX512VBMI:       # %bb.0:
404; AVX512VBMI-NEXT:    movsbl (%rdi), %eax
405; AVX512VBMI-NEXT:    shrl $8, %eax
406; AVX512VBMI-NEXT:    vpbroadcastb %eax, %zmm0
407; AVX512VBMI-NEXT:    retq
408  %tmp = load i8, ptr %ptr, align 1
409  %tmp1 = sext i8 %tmp to i32
410  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
411  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
412  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
413  ret <64 x i8> %tmp4
414}
415
416define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz(<64 x i8> %a) {
417; ALL-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz:
418; ALL:       # %bb.0:
419; ALL-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
420; ALL-NEXT:    retq
421  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
422  ret <64 x i8> %shuffle
423}
424
425define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz(<64 x i8> %a) {
426; ALL-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz:
427; ALL:       # %bb.0:
428; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
429; ALL-NEXT:    retq
430  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 72, i32 0, i32 0, i32 0, i32 73, i32 0, i32 0, i32 0, i32 74, i32 0, i32 0, i32 0, i32 75, i32 0, i32 0, i32 0, i32 76, i32 0, i32 0, i32 0, i32 77, i32 0, i32 0, i32 0, i32 78, i32 0, i32 0, i32 0, i32 79, i32 0, i32 0, i32 0>
431  ret <64 x i8> %shuffle
432}
433
434define <64 x i8> @shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz(<64 x i8> %a) {
435; AVX512F-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
436; AVX512F:       # %bb.0:
437; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
438; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
439; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
440; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
441; AVX512F-NEXT:    retq
442;
443; AVX512BW-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
444; AVX512BW:       # %bb.0:
445; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
446; AVX512BW-NEXT:    retq
447;
448; AVX512DQ-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
449; AVX512DQ:       # %bb.0:
450; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
451; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
452; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
453; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
454; AVX512DQ-NEXT:    retq
455;
456; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
457; AVX512VBMI:       # %bb.0:
458; AVX512VBMI-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
459; AVX512VBMI-NEXT:    retq
460  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 65, i32 0, i32 66, i32 0, i32 67, i32 0, i32 68, i32 0, i32 69, i32 0, i32 70, i32 0, i32 71, i32 0, i32 72, i32 0, i32 73, i32 0, i32 74, i32 0, i32 75, i32 0, i32 76, i32 0, i32 77, i32 0, i32 78, i32 0, i32 79, i32 0, i32 80, i32 0, i32 81, i32 0, i32 82, i32 0, i32 83, i32 0, i32 84, i32 0, i32 85, i32 0, i32 86, i32 0, i32 87, i32 0, i32 88, i32 0, i32 89, i32 0, i32 90, i32 0, i32 91, i32 0, i32 92, i32 0, i32 93, i32 0, i32 94, i32 0, i32 95, i32 0>
461  ret <64 x i8> %shuffle
462}
463
464define <64 x i8> @shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz(<64 x i8> %a) {
465; AVX512F-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
466; AVX512F:       # %bb.0:
467; AVX512F-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
468; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128]
469; AVX512F-NEXT:    # ymm2 = mem[0,1,0,1]
470; AVX512F-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
471; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
472; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
473; AVX512F-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
474; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
475; AVX512F-NEXT:    retq
476;
477; AVX512BW-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
478; AVX512BW:       # %bb.0:
479; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
480; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero
481; AVX512BW-NEXT:    retq
482;
483; AVX512DQ-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
484; AVX512DQ:       # %bb.0:
485; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
486; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128]
487; AVX512DQ-NEXT:    # ymm2 = mem[0,1,0,1]
488; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
489; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
490; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
491; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
492; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
493; AVX512DQ-NEXT:    retq
494;
495; AVX512VBMI-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
496; AVX512VBMI:       # %bb.0:
497; AVX512VBMI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
498; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [63,65,61,67,59,69,57,71,55,73,53,75,51,77,49,79,47,81,45,83,43,85,41,87,39,89,37,91,35,93,33,95,31,97,29,99,27,101,25,103,23,105,21,107,19,109,17,111,15,113,13,115,11,117,9,119,7,121,5,123,3,125,1,127]
499; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
500; AVX512VBMI-NEXT:    retq
501  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 63, i32 64, i32 61, i32 64, i32 59, i32 64, i32 57, i32 64, i32 55, i32 64, i32 53, i32 64, i32 51, i32 64, i32 49, i32 64, i32 47, i32 64, i32 45, i32 64, i32 43, i32 64, i32 41, i32 64, i32 39, i32 64, i32 37, i32 64, i32 35, i32 64, i32 33, i32 64, i32 31, i32 64, i32 29, i32 64, i32 27, i32 64, i32 25, i32 64, i32 23, i32 64, i32 21, i32 64, i32 19, i32 64, i32 17, i32 64, i32 15, i32 64, i32 13, i32 64, i32 11, i32 64, i32 9, i32 64, i32 7, i32 64, i32 5, i32 64, i32 3, i32 64, i32 1, i32 64>
502  ret <64 x i8> %shuffle
503}
504
505define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126(<64 x i8> %a, <64 x i8> %b) {
506; AVX512F-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
507; AVX512F:       # %bb.0:
508; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
509; AVX512F-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
510; AVX512F-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
511; AVX512F-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
512; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
513; AVX512F-NEXT:    # ymm3 = mem[0,1,0,1]
514; AVX512F-NEXT:    vpshufb %ymm3, %ymm2, %ymm2
515; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
516; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
517; AVX512F-NEXT:    vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
518; AVX512F-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
519; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
520; AVX512F-NEXT:    retq
521;
522; AVX512BW-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
523; AVX512BW:       # %bb.0:
524; AVX512BW-NEXT:    vpsllw $8, %zmm1, %zmm1
525; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
526; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero
527; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
528; AVX512BW-NEXT:    retq
529;
530; AVX512DQ-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
531; AVX512DQ:       # %bb.0:
532; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
533; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
534; AVX512DQ-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
535; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
536; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
537; AVX512DQ-NEXT:    # ymm3 = mem[0,1,0,1]
538; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm2, %ymm2
539; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
540; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
541; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
542; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
543; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
544; AVX512DQ-NEXT:    retq
545;
546; AVX512VBMI-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
547; AVX512VBMI:       # %bb.0:
548; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [63,64,61,66,59,68,57,70,55,72,53,74,51,76,49,78,47,80,45,82,43,84,41,86,39,88,37,90,35,92,33,94,31,96,29,98,27,100,25,102,23,104,21,106,19,108,17,110,15,112,13,114,11,116,9,118,7,120,5,122,3,124,1,126]
549; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
550; AVX512VBMI-NEXT:    retq
551  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 63, i32 64, i32 61, i32 66, i32 59, i32 68, i32 57, i32 70, i32 55, i32 72, i32 53, i32 74, i32 51, i32 76, i32 49, i32 78, i32 47, i32 80, i32 45, i32 82, i32 43, i32 84, i32 41, i32 86, i32 39, i32 88, i32 37, i32 90, i32 35, i32 92, i32 33, i32 94, i32 31, i32 96, i32 29, i32 98, i32 27, i32 100, i32 25, i32 102, i32 23, i32 104, i32 21, i32 106, i32 19, i32 108, i32 17, i32 110, i32 15, i32 112, i32 13, i32 114, i32 11, i32 116, i32 9, i32 118, i32 7, i32 120, i32 5, i32 122, i32 3, i32 124, i32 1, i32 126>
552  ret <64 x i8> %shuffle
553}
554
555define <64 x i8> @shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) {
556; AVX512F-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
557; AVX512F:       # %bb.0:
558; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
559; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u]
560; AVX512F-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
561; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u]
562; AVX512F-NEXT:    vpshufb %xmm4, %xmm0, %xmm5
563; AVX512F-NEXT:    vpor %xmm2, %xmm5, %xmm2
564; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
565; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
566; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0]
567; AVX512F-NEXT:    vpblendvb %ymm5, %ymm2, %ymm0, %ymm0
568; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
569; AVX512F-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13]
570; AVX512F-NEXT:    vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
571; AVX512F-NEXT:    vpor %xmm2, %xmm5, %xmm2
572; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
573; AVX512F-NEXT:    vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15]
574; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
575; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
576; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
577; AVX512F-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
578; AVX512F-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
579; AVX512F-NEXT:    vpor %xmm2, %xmm1, %xmm1
580; AVX512F-NEXT:    vinserti32x4 $2, %xmm1, %zmm0, %zmm0
581; AVX512F-NEXT:    retq
582;
583; AVX512BW-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
584; AVX512BW:       # %bb.0:
585; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
586; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u]
587; AVX512BW-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
588; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u]
589; AVX512BW-NEXT:    vpshufb %xmm4, %xmm0, %xmm5
590; AVX512BW-NEXT:    vpor %xmm2, %xmm5, %xmm2
591; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
592; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
593; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0]
594; AVX512BW-NEXT:    vpblendvb %ymm5, %ymm2, %ymm0, %ymm0
595; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
596; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13]
597; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
598; AVX512BW-NEXT:    vpor %xmm2, %xmm5, %xmm2
599; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
600; AVX512BW-NEXT:    vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15]
601; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
602; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
603; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
604; AVX512BW-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
605; AVX512BW-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
606; AVX512BW-NEXT:    vpor %xmm2, %xmm1, %xmm1
607; AVX512BW-NEXT:    vinserti32x4 $2, %xmm1, %zmm0, %zmm0
608; AVX512BW-NEXT:    retq
609;
610; AVX512DQ-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
611; AVX512DQ:       # %bb.0:
612; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
613; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u]
614; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
615; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u]
616; AVX512DQ-NEXT:    vpshufb %xmm4, %xmm0, %xmm5
617; AVX512DQ-NEXT:    vpor %xmm2, %xmm5, %xmm2
618; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
619; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
620; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0]
621; AVX512DQ-NEXT:    vpblendvb %ymm5, %ymm2, %ymm0, %ymm0
622; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
623; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13]
624; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
625; AVX512DQ-NEXT:    vpor %xmm2, %xmm5, %xmm2
626; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
627; AVX512DQ-NEXT:    vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15]
628; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
629; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
630; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
631; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
632; AVX512DQ-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
633; AVX512DQ-NEXT:    vpor %xmm2, %xmm1, %xmm1
634; AVX512DQ-NEXT:    vinserti32x4 $2, %xmm1, %zmm0, %zmm0
635; AVX512DQ-NEXT:    retq
636;
637; AVX512VBMI-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
638; AVX512VBMI:       # %bb.0:
639; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,3,7,9,13,15,19,21,25,27,31,33,37,39,43,45,49,51,55,57,61,63,67,69,73,75,79,81,85,87,91,93,97,99,103,105,109,111,115,117,121,123,127,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
640; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
641; AVX512VBMI-NEXT:    retq
642  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 1, i32 3, i32 7, i32 9, i32 13, i32 15, i32 19, i32 21, i32 25, i32 27, i32 31, i32 33, i32 37, i32 39, i32 43, i32 45, i32 49, i32 51, i32 55, i32 57, i32 61, i32 63, i32 67, i32 69, i32 73, i32 75, i32 79, i32 81, i32 85, i32 87, i32 91, i32 93, i32 97, i32 99, i32 103, i32 105, i32 109, i32 111, i32 115, i32 117, i32 121, i32 123, i32 127, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
643  ret <64 x i8> %r
644}
645
646define <64 x i8> @shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) {
647; AVX512F-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
648; AVX512F:       # %bb.0:
649; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
650; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u]
651; AVX512F-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
652; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
653; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u]
654; AVX512F-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
655; AVX512F-NEXT:    vpor %xmm4, %xmm2, %xmm2
656; AVX512F-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
657; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
658; AVX512F-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
659; AVX512F-NEXT:    vpor %xmm3, %xmm4, %xmm3
660; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
661; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
662; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
663; AVX512F-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
664; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
665; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
666; AVX512F-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
667; AVX512F-NEXT:    vpor %xmm3, %xmm1, %xmm1
668; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
669; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
670; AVX512F-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
671; AVX512F-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
672; AVX512F-NEXT:    retq
673;
674; AVX512BW-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
675; AVX512BW:       # %bb.0:
676; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
677; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u]
678; AVX512BW-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
679; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm2
680; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u]
681; AVX512BW-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
682; AVX512BW-NEXT:    vpor %xmm4, %xmm2, %xmm2
683; AVX512BW-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
684; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm4
685; AVX512BW-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
686; AVX512BW-NEXT:    vpor %xmm3, %xmm4, %xmm3
687; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
688; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
689; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
690; AVX512BW-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
691; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
692; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
693; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
694; AVX512BW-NEXT:    vpor %xmm3, %xmm1, %xmm1
695; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
696; AVX512BW-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
697; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
698; AVX512BW-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
699; AVX512BW-NEXT:    retq
700;
701; AVX512DQ-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
702; AVX512DQ:       # %bb.0:
703; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
704; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u]
705; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
706; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm2
707; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u]
708; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
709; AVX512DQ-NEXT:    vpor %xmm4, %xmm2, %xmm2
710; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
711; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm4
712; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
713; AVX512DQ-NEXT:    vpor %xmm3, %xmm4, %xmm3
714; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
715; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
716; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
717; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
718; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
719; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
720; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
721; AVX512DQ-NEXT:    vpor %xmm3, %xmm1, %xmm1
722; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
723; AVX512DQ-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
724; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
725; AVX512DQ-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
726; AVX512DQ-NEXT:    retq
727;
728; AVX512VBMI-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
729; AVX512VBMI:       # %bb.0:
730; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [65,69,71,75,77,81,83,87,89,93,95,99,101,105,107,111,113,117,119,123,125,1,3,7,9,13,15,19,21,25,27,31,33,37,39,43,45,49,51,55,57,61,63,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
731; AVX512VBMI-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2
732; AVX512VBMI-NEXT:    vmovdqa64 %zmm2, %zmm0
733; AVX512VBMI-NEXT:    retq
734  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 1, i32 5, i32 7, i32 11, i32 13, i32 17, i32 19, i32 23, i32 25, i32 29, i32 31, i32 35, i32 37, i32 41, i32 43, i32 47, i32 49, i32 53, i32 55, i32 59, i32 61, i32 65, i32 67, i32 71, i32 73, i32 77, i32 79, i32 83, i32 85, i32 89, i32 91, i32 95, i32 97, i32 101, i32 103, i32 107, i32 109, i32 113, i32 115, i32 119, i32 121, i32 125, i32 127, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
735  ret <64 x i8> %r
736}
737
738define <64 x i8> @shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) {
739; AVX512F-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
740; AVX512F:       # %bb.0:
741; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
742; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
743; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u]
744; AVX512F-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
745; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u]
746; AVX512F-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
747; AVX512F-NEXT:    vpor %xmm3, %xmm2, %xmm2
748; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
749; AVX512F-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
750; AVX512F-NEXT:    vpshufb %xmm5, %xmm0, %xmm4
751; AVX512F-NEXT:    vpor %xmm3, %xmm4, %xmm3
752; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
753; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
754; AVX512F-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7]
755; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
756; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
757; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
758; AVX512F-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14]
759; AVX512F-NEXT:    vpor %xmm3, %xmm1, %xmm1
760; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
761; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
762; AVX512F-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
763; AVX512F-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
764; AVX512F-NEXT:    retq
765;
766; AVX512BW-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
767; AVX512BW:       # %bb.0:
768; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
769; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm3
770; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u]
771; AVX512BW-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
772; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u]
773; AVX512BW-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
774; AVX512BW-NEXT:    vpor %xmm3, %xmm2, %xmm2
775; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm3
776; AVX512BW-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
777; AVX512BW-NEXT:    vpshufb %xmm5, %xmm0, %xmm4
778; AVX512BW-NEXT:    vpor %xmm3, %xmm4, %xmm3
779; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
780; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
781; AVX512BW-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7]
782; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
783; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
784; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm1
785; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14]
786; AVX512BW-NEXT:    vpor %xmm3, %xmm1, %xmm1
787; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
788; AVX512BW-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
789; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
790; AVX512BW-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
791; AVX512BW-NEXT:    retq
792;
793; AVX512DQ-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
794; AVX512DQ:       # %bb.0:
795; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
796; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
797; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u]
798; AVX512DQ-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
799; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u]
800; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
801; AVX512DQ-NEXT:    vpor %xmm3, %xmm2, %xmm2
802; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
803; AVX512DQ-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
804; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm0, %xmm4
805; AVX512DQ-NEXT:    vpor %xmm3, %xmm4, %xmm3
806; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
807; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
808; AVX512DQ-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7]
809; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
810; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
811; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
812; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14]
813; AVX512DQ-NEXT:    vpor %xmm3, %xmm1, %xmm1
814; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
815; AVX512DQ-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
816; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
817; AVX512DQ-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
818; AVX512DQ-NEXT:    retq
819;
820; AVX512VBMI-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
821; AVX512VBMI:       # %bb.0:
822; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,4,8,10,14,16,20,22,26,28,32,34,38,40,44,46,50,52,56,58,62,64,68,70,74,76,80,82,86,88,92,94,98,100,104,106,110,112,116,118,122,124,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
823; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
824; AVX512VBMI-NEXT:    retq
825  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 2, i32 4, i32 8, i32 10, i32 14, i32 16, i32 20, i32 22, i32 26, i32 28, i32 32, i32 34, i32 38, i32 40, i32 44, i32 46, i32 50, i32 52, i32 56, i32 58, i32 62, i32 64, i32 68, i32 70, i32 74, i32 76, i32 80, i32 82, i32 86, i32 88, i32 92, i32 94, i32 98, i32 100, i32 104, i32 106, i32 110, i32 112, i32 116, i32 118, i32 122, i32 124, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
826  ret <64 x i8> %r
827}
828
829define <64 x i8> @shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) {
830; AVX512F-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
831; AVX512F:       # %bb.0:
832; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
833; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u]
834; AVX512F-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
835; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
836; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u]
837; AVX512F-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
838; AVX512F-NEXT:    vpor %xmm4, %xmm2, %xmm2
839; AVX512F-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
840; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
841; AVX512F-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
842; AVX512F-NEXT:    vpor %xmm3, %xmm4, %xmm3
843; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
844; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
845; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
846; AVX512F-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
847; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
848; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
849; AVX512F-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
850; AVX512F-NEXT:    vpor %xmm3, %xmm1, %xmm1
851; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
852; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
853; AVX512F-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
854; AVX512F-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
855; AVX512F-NEXT:    retq
856;
857; AVX512BW-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
858; AVX512BW:       # %bb.0:
859; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
860; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u]
861; AVX512BW-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
862; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm2
863; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u]
864; AVX512BW-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
865; AVX512BW-NEXT:    vpor %xmm4, %xmm2, %xmm2
866; AVX512BW-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
867; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm4
868; AVX512BW-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
869; AVX512BW-NEXT:    vpor %xmm3, %xmm4, %xmm3
870; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
871; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
872; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
873; AVX512BW-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
874; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
875; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
876; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
877; AVX512BW-NEXT:    vpor %xmm3, %xmm1, %xmm1
878; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
879; AVX512BW-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
880; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
881; AVX512BW-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
882; AVX512BW-NEXT:    retq
883;
884; AVX512DQ-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
885; AVX512DQ:       # %bb.0:
886; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
887; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u]
888; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
889; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm2
890; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u]
891; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
892; AVX512DQ-NEXT:    vpor %xmm4, %xmm2, %xmm2
893; AVX512DQ-NEXT:    vpshufb %xmm3, %xmm0, %xmm3
894; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm4
895; AVX512DQ-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
896; AVX512DQ-NEXT:    vpor %xmm3, %xmm4, %xmm3
897; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
898; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
899; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0]
900; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
901; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
902; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
903; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
904; AVX512DQ-NEXT:    vpor %xmm3, %xmm1, %xmm1
905; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
906; AVX512DQ-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0]
907; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm0, %ymm1, %ymm0
908; AVX512DQ-NEXT:    vinserti32x4 $2, %xmm2, %zmm0, %zmm0
909; AVX512DQ-NEXT:    retq
910;
911; AVX512VBMI-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
912; AVX512VBMI:       # %bb.0:
913; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [64,68,70,74,76,80,82,86,88,92,94,98,100,104,106,110,112,116,118,122,124,0,2,6,8,12,14,18,20,24,26,30,32,36,38,42,44,48,50,54,56,60,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
914; AVX512VBMI-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2
915; AVX512VBMI-NEXT:    vmovdqa64 %zmm2, %zmm0
916; AVX512VBMI-NEXT:    retq
917  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 4, i32 6, i32 10, i32 12, i32 16, i32 18, i32 22, i32 24, i32 28, i32 30, i32 34, i32 36, i32 40, i32 42, i32 46, i32 48, i32 52, i32 54, i32 58, i32 60, i32 64, i32 66, i32 70, i32 72, i32 76, i32 78, i32 82, i32 84, i32 88, i32 90, i32 94, i32 96, i32 100, i32 102, i32 106, i32 108, i32 112, i32 114, i32 118, i32 120, i32 124, i32 126, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
918  ret <64 x i8> %r
919}
920
921define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125(<64 x i8> %a0, <64 x i8> %a1) {
922; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125:
923; AVX512F:       # %bb.0:
924; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
925; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
926; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13]
927; AVX512F-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
928; AVX512F-NEXT:    vpor %xmm3, %xmm2, %xmm2
929; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
930; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
931; AVX512F-NEXT:    vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15]
932; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
933; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
934; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
935; AVX512F-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
936; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
937; AVX512F-NEXT:    retq
938;
939; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125:
940; AVX512BW:       # %bb.0:
941; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
942; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm3
943; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13]
944; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
945; AVX512BW-NEXT:    vpor %xmm3, %xmm2, %xmm2
946; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
947; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
948; AVX512BW-NEXT:    vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15]
949; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
950; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
951; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
952; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
953; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
954; AVX512BW-NEXT:    retq
955;
956; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125:
957; AVX512DQ:       # %bb.0:
958; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
959; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
960; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13]
961; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero
962; AVX512DQ-NEXT:    vpor %xmm3, %xmm2, %xmm2
963; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
964; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u]
965; AVX512DQ-NEXT:    vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15]
966; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
967; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
968; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
969; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
970; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
971; AVX512DQ-NEXT:    retq
972;
973; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125:
974; AVX512VBMI:       # %bb.0:
975; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,65,69,71,75,77,81,83,87,89,93,95,99,101,105,107,111,113,117,119,123,125]
976; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
977; AVX512VBMI-NEXT:    retq
978  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 65, i32 69, i32 71, i32 75, i32 77, i32 81, i32 83, i32 87, i32 89, i32 93, i32 95, i32 99, i32 101, i32 105, i32 107, i32 111, i32 113, i32 117, i32 119, i32 123, i32 125>
979  ret <64 x i8> %r
980}
981
982define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127(<64 x i8> %a0, <64 x i8> %a1) {
983; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127:
984; AVX512F:       # %bb.0:
985; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
986; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
987; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
988; AVX512F-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
989; AVX512F-NEXT:    vpor %xmm3, %xmm2, %xmm2
990; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
991; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
992; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
993; AVX512F-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
994; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
995; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
996; AVX512F-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
997; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
998; AVX512F-NEXT:    retq
999;
1000; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127:
1001; AVX512BW:       # %bb.0:
1002; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1003; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm3
1004; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
1005; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
1006; AVX512BW-NEXT:    vpor %xmm3, %xmm2, %xmm2
1007; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1008; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
1009; AVX512BW-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
1010; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
1011; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1012; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
1013; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1014; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1015; AVX512BW-NEXT:    retq
1016;
1017; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127:
1018; AVX512DQ:       # %bb.0:
1019; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1020; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
1021; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15]
1022; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero
1023; AVX512DQ-NEXT:    vpor %xmm3, %xmm2, %xmm2
1024; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1025; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u]
1026; AVX512DQ-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
1027; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
1028; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1029; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
1030; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1031; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1032; AVX512DQ-NEXT:    retq
1033;
1034; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127:
1035; AVX512VBMI:       # %bb.0:
1036; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,67,69,73,75,79,81,85,87,91,93,97,99,103,105,109,111,115,117,121,123,127]
1037; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
1038; AVX512VBMI-NEXT:    retq
1039  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 67, i32 69, i32 73, i32 75, i32 79, i32 81, i32 85, i32 87, i32 91, i32 93, i32 97, i32 99, i32 103, i32 105, i32 109, i32 111, i32 115, i32 117, i32 121, i32 123, i32 127>
1040  ret <64 x i8> %r
1041}
1042
1043define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126(<64 x i8> %a0, <64 x i8> %a1) {
1044; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126:
1045; AVX512F:       # %bb.0:
1046; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1047; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
1048; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
1049; AVX512F-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14]
1050; AVX512F-NEXT:    vpor %xmm3, %xmm2, %xmm2
1051; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1052; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
1053; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0]
1054; AVX512F-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1055; AVX512F-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1056; AVX512F-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7]
1057; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1058; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1059; AVX512F-NEXT:    retq
1060;
1061; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126:
1062; AVX512BW:       # %bb.0:
1063; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1064; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
1065; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm2
1066; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14]
1067; AVX512BW-NEXT:    vpor %xmm3, %xmm2, %xmm2
1068; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1069; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
1070; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0]
1071; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1072; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1073; AVX512BW-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7]
1074; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1075; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1076; AVX512BW-NEXT:    retq
1077;
1078; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126:
1079; AVX512DQ:       # %bb.0:
1080; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1081; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero
1082; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm2
1083; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14]
1084; AVX512DQ-NEXT:    vpor %xmm3, %xmm2, %xmm2
1085; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1086; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u]
1087; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0]
1088; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1089; AVX512DQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1090; AVX512DQ-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7]
1091; AVX512DQ-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1092; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1093; AVX512DQ-NEXT:    retq
1094;
1095; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126:
1096; AVX512VBMI:       # %bb.0:
1097; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,64,66,70,72,76,78,82,84,88,90,94,96,100,102,106,108,112,114,118,120,124,126]
1098; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
1099; AVX512VBMI-NEXT:    retq
1100  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 64, i32 66, i32 70, i32 72, i32 76, i32 78, i32 82, i32 84, i32 88, i32 90, i32 94, i32 96, i32 100, i32 102, i32 106, i32 108, i32 112, i32 114, i32 118, i32 120, i32 124, i32 126>
1101  ret <64 x i8> %r
1102}
1103
1104define <64 x i8> @shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124(<64 x i8> %a0, <64 x i8> %a1) {
1105; AVX512F-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1106; AVX512F:       # %bb.0:
1107; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1108; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1109; AVX512F-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1110; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1111; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1112; AVX512F-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1113; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1114; AVX512F-NEXT:    retq
1115;
1116; AVX512BW-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1117; AVX512BW:       # %bb.0:
1118; AVX512BW-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1119; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1120; AVX512BW-NEXT:    retq
1121;
1122; AVX512DQ-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1123; AVX512DQ:       # %bb.0:
1124; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1125; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1126; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1127; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1128; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1129; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1130; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1131; AVX512DQ-NEXT:    retq
1132;
1133; AVX512VBMI-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1134; AVX512VBMI:       # %bb.0:
1135; AVX512VBMI-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1136; AVX512VBMI-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1137; AVX512VBMI-NEXT:    retq
1138  %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124>
1139  ret <64 x i8> %r
1140}
1141
1142; PR79799
1143define <64 x i8> @shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125(<64 x i8> %a0, <64 x i8> %a1) {
1144; AVX512F-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1145; AVX512F:       # %bb.0:
1146; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1147; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1148; AVX512F-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1149; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1150; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1151; AVX512F-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1152; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1153; AVX512F-NEXT:    retq
1154;
1155; AVX512BW-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1156; AVX512BW:       # %bb.0:
1157; AVX512BW-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1158; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zmm1[30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29],zmm1[46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45],zmm1[62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61]
1159; AVX512BW-NEXT:    retq
1160;
1161; AVX512DQ-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1162; AVX512DQ:       # %bb.0:
1163; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1164; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1165; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1166; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1167; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1168; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1169; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1170; AVX512DQ-NEXT:    retq
1171;
1172; AVX512VBMI-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1173; AVX512VBMI:       # %bb.0:
1174; AVX512VBMI-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1175; AVX512VBMI-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zmm1[30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29],zmm1[46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45],zmm1[62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61]
1176; AVX512VBMI-NEXT:    retq
1177  %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125>
1178  ret <64 x i8> %r
1179}
1180
1181define <64 x i8> @shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126(<64 x i8> %a0, <64 x i8> %a1) {
1182; AVX512F-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1183; AVX512F:       # %bb.0:
1184; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1185; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1186; AVX512F-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1187; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1188; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1189; AVX512F-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1190; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1191; AVX512F-NEXT:    retq
1192;
1193; AVX512BW-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1194; AVX512BW:       # %bb.0:
1195; AVX512BW-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1196; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1197; AVX512BW-NEXT:    retq
1198;
1199; AVX512DQ-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1200; AVX512DQ:       # %bb.0:
1201; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1202; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1203; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1204; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1205; AVX512DQ-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1206; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1207; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1208; AVX512DQ-NEXT:    retq
1209;
1210; AVX512VBMI-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1211; AVX512VBMI:       # %bb.0:
1212; AVX512VBMI-NEXT:    valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1213; AVX512VBMI-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1214; AVX512VBMI-NEXT:    retq
1215  %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
1216  ret <64 x i8> %r
1217}
1218
1219define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126(<64 x i8> %a0, <64 x i8> %a1) {
1220; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
1221; AVX512F:       # %bb.0:
1222; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1223; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
1224; AVX512F-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
1225; AVX512F-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
1226; AVX512F-NEXT:    vpor %xmm3, %xmm2, %xmm2
1227; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1228; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
1229; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
1230; AVX512F-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
1231; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1232; AVX512F-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
1233; AVX512F-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1234; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1235; AVX512F-NEXT:    retq
1236;
1237; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
1238; AVX512BW:       # %bb.0:
1239; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1240; AVX512BW-NEXT:    vextracti128 $1, %ymm2, %xmm3
1241; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
1242; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
1243; AVX512BW-NEXT:    vpor %xmm3, %xmm2, %xmm2
1244; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1245; AVX512BW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
1246; AVX512BW-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
1247; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
1248; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1249; AVX512BW-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
1250; AVX512BW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1251; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1252; AVX512BW-NEXT:    retq
1253;
1254; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
1255; AVX512DQ:       # %bb.0:
1256; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1257; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
1258; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14]
1259; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero
1260; AVX512DQ-NEXT:    vpor %xmm3, %xmm2, %xmm2
1261; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
1262; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u]
1263; AVX512DQ-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295]
1264; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
1265; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1266; AVX512DQ-NEXT:    vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615]
1267; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1268; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1269; AVX512DQ-NEXT:    retq
1270;
1271; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
1272; AVX512VBMI:       # %bb.0:
1273; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,66,68,72,74,78,80,84,86,90,92,96,98,102,104,108,110,114,116,120,122,126]
1274; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
1275; AVX512VBMI-NEXT:    retq
1276  %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 66, i32 68, i32 72, i32 74, i32 78, i32 80, i32 84, i32 86, i32 90, i32 92, i32 96, i32 98, i32 102, i32 104, i32 108, i32 110, i32 114, i32 116, i32 120, i32 122, i32 126>
1277  ret <64 x i8> %r
1278}
1279
1280define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind {
1281; AVX512F-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1282; AVX512F:       # %bb.0:
1283; AVX512F-NEXT:    vpsrad $25, %zmm0, %zmm0
1284; AVX512F-NEXT:    vpsrad $25, %zmm1, %zmm1
1285; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1286; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
1287; AVX512F-NEXT:    vpackssdw %ymm2, %ymm3, %ymm2
1288; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1289; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1290; AVX512F-NEXT:    retq
1291;
1292; AVX512BW-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1293; AVX512BW:       # %bb.0:
1294; AVX512BW-NEXT:    vpsrad $25, %zmm0, %zmm0
1295; AVX512BW-NEXT:    vpsrad $25, %zmm1, %zmm1
1296; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
1297; AVX512BW-NEXT:    retq
1298;
1299; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1300; AVX512DQ:       # %bb.0:
1301; AVX512DQ-NEXT:    vpsrad $25, %zmm0, %zmm0
1302; AVX512DQ-NEXT:    vpsrad $25, %zmm1, %zmm1
1303; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1304; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
1305; AVX512DQ-NEXT:    vpackssdw %ymm2, %ymm3, %ymm2
1306; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1307; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1308; AVX512DQ-NEXT:    retq
1309;
1310; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1311; AVX512VBMI:       # %bb.0:
1312; AVX512VBMI-NEXT:    vpsrad $25, %zmm0, %zmm0
1313; AVX512VBMI-NEXT:    vpsrad $25, %zmm1, %zmm1
1314; AVX512VBMI-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
1315; AVX512VBMI-NEXT:    retq
1316  %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1317  %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1318  %3 = bitcast <16 x i32> %1 to <64 x i8>
1319  %4 = bitcast <16 x i32> %2 to <64 x i8>
1320  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  1, i32  4, i32  5, i32  8, i32  9, i32 12, i32 13, i32  64, i32  65, i32  68, i32  69, i32  72, i32  73, i32  76, i32  77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32  80, i32  81, i32  84, i32  85, i32  88, i32  89, i32  92, i32  93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32  96, i32  97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125>
1321  ret <64 x i8> %5
1322}
1323
1324define <64 x i8> @shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind {
1325; AVX512F-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1326; AVX512F:       # %bb.0:
1327; AVX512F-NEXT:    vpsrad $25, %zmm0, %zmm0
1328; AVX512F-NEXT:    vpsrad $25, %zmm1, %zmm1
1329; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2
1330; AVX512F-NEXT:    vpacksswb %ymm2, %ymm2, %ymm2
1331; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1332; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1333; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1334; AVX512F-NEXT:    vpacksswb %ymm0, %ymm0, %ymm0
1335; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1336; AVX512F-NEXT:    retq
1337;
1338; AVX512BW-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1339; AVX512BW:       # %bb.0:
1340; AVX512BW-NEXT:    vpsrad $25, %zmm0, %zmm0
1341; AVX512BW-NEXT:    vpsrad $25, %zmm1, %zmm1
1342; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
1343; AVX512BW-NEXT:    vpacksswb %zmm0, %zmm0, %zmm0
1344; AVX512BW-NEXT:    retq
1345;
1346; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1347; AVX512DQ:       # %bb.0:
1348; AVX512DQ-NEXT:    vpsrad $25, %zmm0, %zmm0
1349; AVX512DQ-NEXT:    vpsrad $25, %zmm1, %zmm1
1350; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2
1351; AVX512DQ-NEXT:    vpacksswb %ymm2, %ymm2, %ymm2
1352; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1353; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1354; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1355; AVX512DQ-NEXT:    vpacksswb %ymm0, %ymm0, %ymm0
1356; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1357; AVX512DQ-NEXT:    retq
1358;
1359; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1360; AVX512VBMI:       # %bb.0:
1361; AVX512VBMI-NEXT:    vpsrad $25, %zmm0, %zmm0
1362; AVX512VBMI-NEXT:    vpsrad $25, %zmm1, %zmm1
1363; AVX512VBMI-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
1364; AVX512VBMI-NEXT:    vpacksswb %zmm0, %zmm0, %zmm0
1365; AVX512VBMI-NEXT:    retq
1366  %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1367  %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1368  %3 = bitcast <16 x i32> %1 to <64 x i8>
1369  %4 = bitcast <16 x i32> %2 to <64 x i8>
1370  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124>
1371  ret <64 x i8> %5
1372}
1373
1374define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind {
1375; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1376; AVX512F:       # %bb.0:
1377; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm0
1378; AVX512F-NEXT:    vpsrld $25, %zmm1, %zmm1
1379; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1380; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
1381; AVX512F-NEXT:    vpackusdw %ymm2, %ymm3, %ymm2
1382; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
1383; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1384; AVX512F-NEXT:    retq
1385;
1386; AVX512BW-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1387; AVX512BW:       # %bb.0:
1388; AVX512BW-NEXT:    vpsrld $25, %zmm0, %zmm0
1389; AVX512BW-NEXT:    vpsrld $25, %zmm1, %zmm1
1390; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1391; AVX512BW-NEXT:    retq
1392;
1393; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1394; AVX512DQ:       # %bb.0:
1395; AVX512DQ-NEXT:    vpsrld $25, %zmm0, %zmm0
1396; AVX512DQ-NEXT:    vpsrld $25, %zmm1, %zmm1
1397; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1398; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
1399; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm3, %ymm2
1400; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
1401; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1402; AVX512DQ-NEXT:    retq
1403;
1404; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
1405; AVX512VBMI:       # %bb.0:
1406; AVX512VBMI-NEXT:    vpsrld $25, %zmm0, %zmm0
1407; AVX512VBMI-NEXT:    vpsrld $25, %zmm1, %zmm1
1408; AVX512VBMI-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1409; AVX512VBMI-NEXT:    retq
1410  %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1411  %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1412  %3 = bitcast <16 x i32> %1 to <64 x i8>
1413  %4 = bitcast <16 x i32> %2 to <64 x i8>
1414  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  1, i32  4, i32  5, i32  8, i32  9, i32 12, i32 13, i32  64, i32  65, i32  68, i32  69, i32  72, i32  73, i32  76, i32  77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32  80, i32  81, i32  84, i32  85, i32  88, i32  89, i32  92, i32  93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32  96, i32  97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125>
1415  ret <64 x i8> %5
1416}
1417
1418define <64 x i8> @shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind {
1419; AVX512F-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1420; AVX512F:       # %bb.0:
1421; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm0
1422; AVX512F-NEXT:    vpsrld $25, %zmm1, %zmm1
1423; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2
1424; AVX512F-NEXT:    vpackuswb %ymm2, %ymm2, %ymm2
1425; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1426; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1427; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
1428; AVX512F-NEXT:    vpackuswb %ymm0, %ymm0, %ymm0
1429; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1430; AVX512F-NEXT:    retq
1431;
1432; AVX512BW-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1433; AVX512BW:       # %bb.0:
1434; AVX512BW-NEXT:    vpsrld $25, %zmm0, %zmm0
1435; AVX512BW-NEXT:    vpsrld $25, %zmm1, %zmm1
1436; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1437; AVX512BW-NEXT:    vpackuswb %zmm0, %zmm0, %zmm0
1438; AVX512BW-NEXT:    retq
1439;
1440; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1441; AVX512DQ:       # %bb.0:
1442; AVX512DQ-NEXT:    vpsrld $25, %zmm0, %zmm0
1443; AVX512DQ-NEXT:    vpsrld $25, %zmm1, %zmm1
1444; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2
1445; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm2, %ymm2
1446; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1447; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1448; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
1449; AVX512DQ-NEXT:    vpackuswb %ymm0, %ymm0, %ymm0
1450; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1451; AVX512DQ-NEXT:    retq
1452;
1453; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
1454; AVX512VBMI:       # %bb.0:
1455; AVX512VBMI-NEXT:    vpsrld $25, %zmm0, %zmm0
1456; AVX512VBMI-NEXT:    vpsrld $25, %zmm1, %zmm1
1457; AVX512VBMI-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1458; AVX512VBMI-NEXT:    vpackuswb %zmm0, %zmm0, %zmm0
1459; AVX512VBMI-NEXT:    retq
1460  %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1461  %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
1462  %3 = bitcast <16 x i32> %1 to <64 x i8>
1463  %4 = bitcast <16 x i32> %2 to <64 x i8>
1464  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124>
1465  ret <64 x i8> %5
1466}
1467
1468define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) {
1469; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
1470; AVX512F:       # %bb.0:
1471; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1472; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
1473; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
1474; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
1475; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1476; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
1477; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
1478; AVX512F-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
1479; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1480; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7]
1481; AVX512F-NEXT:    retq
1482;
1483; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
1484; AVX512BW:       # %bb.0:
1485; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
1486; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
1487; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1488; AVX512BW-NEXT:    vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
1489; AVX512BW-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1490; AVX512BW-NEXT:    retq
1491;
1492; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
1493; AVX512DQ:       # %bb.0:
1494; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
1495; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
1496; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
1497; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
1498; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1499; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
1500; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
1501; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
1502; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1503; AVX512DQ-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7]
1504; AVX512DQ-NEXT:    retq
1505;
1506; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
1507; AVX512VBMI:       # %bb.0:
1508; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127]
1509; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
1510; AVX512VBMI-NEXT:    retq
1511  %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1512  %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1513  %3 = bitcast <32 x i16> %1 to <64 x i8>
1514  %4 = bitcast <32 x i16> %2 to <64 x i8>
1515  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
1516  ret <64 x i8> %5
1517}
1518
1519define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) {
1520; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
1521; AVX512F:       # %bb.0:
1522; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm2
1523; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1524; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
1525; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm3
1526; AVX512F-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
1527; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1528; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
1529; AVX512F-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
1530; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1531; AVX512F-NEXT:    retq
1532;
1533; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
1534; AVX512BW:       # %bb.0:
1535; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
1536; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
1537; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1538; AVX512BW-NEXT:    retq
1539;
1540; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
1541; AVX512DQ:       # %bb.0:
1542; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm2
1543; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1544; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
1545; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm3
1546; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
1547; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1548; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
1549; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
1550; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1551; AVX512DQ-NEXT:    retq
1552;
1553; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
1554; AVX512VBMI:       # %bb.0:
1555; AVX512VBMI-NEXT:    vpsrlw $8, %zmm0, %zmm0
1556; AVX512VBMI-NEXT:    vpsrlw $8, %zmm1, %zmm1
1557; AVX512VBMI-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1558; AVX512VBMI-NEXT:    retq
1559  %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1560  %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1561  %3 = bitcast <32 x i16> %1 to <64 x i8>
1562  %4 = bitcast <32 x i16> %2 to <64 x i8>
1563  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
1564  ret <64 x i8> %5
1565}
1566
1567; PR113396
1568define <64 x i8> @shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01(<8 x i8> %0) {
1569; AVX512F-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01:
1570; AVX512F:       # %bb.0:
1571; AVX512F-NEXT:    vpsrlw $8, %xmm0, %xmm0
1572; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
1573; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
1574; AVX512F-NEXT:    retq
1575;
1576; AVX512BW-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01:
1577; AVX512BW:       # %bb.0:
1578; AVX512BW-NEXT:    vpsrlw $8, %xmm0, %xmm0
1579; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
1580; AVX512BW-NEXT:    retq
1581;
1582; AVX512DQ-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01:
1583; AVX512DQ:       # %bb.0:
1584; AVX512DQ-NEXT:    vpsrlw $8, %xmm0, %xmm0
1585; AVX512DQ-NEXT:    vpbroadcastb %xmm0, %ymm0
1586; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
1587; AVX512DQ-NEXT:    retq
1588;
1589; AVX512VBMI-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01:
1590; AVX512VBMI:       # %bb.0:
1591; AVX512VBMI-NEXT:    vpsrlw $8, %xmm0, %xmm0
1592; AVX512VBMI-NEXT:    vpbroadcastb %xmm0, %zmm0
1593; AVX512VBMI-NEXT:    retq
1594  %s = shufflevector <8 x i8> %0, <8 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1595  ret <64 x i8> %s
1596}
1597
1598; PR114001
1599define <64 x i8> @shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07(<8 x i8> %a0) {
1600; AVX512F-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07:
1601; AVX512F:       # %bb.0:
1602; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1603; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1604; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1605; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1606; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1607; AVX512F-NEXT:    retq
1608;
1609; AVX512BW-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07:
1610; AVX512BW:       # %bb.0:
1611; AVX512BW-NEXT:    vpbroadcastq %xmm0, %zmm0
1612; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55]
1613; AVX512BW-NEXT:    retq
1614;
1615; AVX512DQ-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07:
1616; AVX512DQ:       # %bb.0:
1617; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1618; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1619; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1620; AVX512DQ-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1621; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1622; AVX512DQ-NEXT:    retq
1623;
1624; AVX512VBMI-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07:
1625; AVX512VBMI:       # %bb.0:
1626; AVX512VBMI-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1627; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7]
1628; AVX512VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
1629; AVX512VBMI-NEXT:    retq
1630  %s = shufflevector <8 x i8> %a0, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1631  ret <64 x i8> %s
1632}
1633
1634define <64 x i8> @PR54562_ref(<64 x i8> %a0) {
1635; AVX512F-LABEL: PR54562_ref:
1636; AVX512F:       # %bb.0:
1637; AVX512F-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2]
1638; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
1639; AVX512F-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
1640; AVX512F-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
1641; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1642; AVX512F-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
1643; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
1644; AVX512F-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
1645; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1646; AVX512F-NEXT:    retq
1647;
1648; AVX512BW-LABEL: PR54562_ref:
1649; AVX512BW:       # %bb.0:
1650; AVX512BW-NEXT:    vpmovsxbq {{.*#+}} zmm1 = [0,1,1,2,3,4,4,5]
1651; AVX512BW-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1652; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62]
1653; AVX512BW-NEXT:    retq
1654;
1655; AVX512DQ-LABEL: PR54562_ref:
1656; AVX512DQ:       # %bb.0:
1657; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2]
1658; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
1659; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
1660; AVX512DQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
1661; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1662; AVX512DQ-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
1663; AVX512DQ-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
1664; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
1665; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1666; AVX512DQ-NEXT:    retq
1667;
1668; AVX512VBMI-LABEL: PR54562_ref:
1669; AVX512VBMI:       # %bb.0:
1670; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46]
1671; AVX512VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
1672; AVX512VBMI-NEXT:    retq
1673  %shuffle1 = shufflevector <64 x i8> %a0, <64 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 poison, i32 poison, i32 poison, i32 poison, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 poison, i32 poison, i32 poison, i32 poison>
1674  %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> <i32 1,  i32 0,  i32 2,  i32 1,  i32 4,  i32 3,  i32 5,  i32 4, i32 7,  i32 6,  i32 8,  i32 7,  i32 10, i32 9,  i32 11, i32 10, i32 17, i32 16, i32 18, i32 17, i32 20, i32 19, i32 21, i32 20, i32 23, i32 22, i32 24, i32 23, i32 26, i32 25, i32 27, i32 26, i32 33, i32 32, i32 34, i32 33, i32 36, i32 35, i32 37, i32 36, i32 39, i32 38, i32 40, i32 39, i32 42, i32 41, i32 43, i32 42, i32 49, i32 48, i32 50, i32 49, i32 52, i32 51, i32 53, i32 52, i32 55, i32 54, i32 56, i32 55, i32 58, i32 57, i32 59, i32 58>
1675  ret <64 x i8> %shuffle2
1676}
1677
1678define void @PR54562_mem(ptr %src, ptr %dst) {
1679; AVX512F-LABEL: PR54562_mem:
1680; AVX512F:       # %bb.0:
1681; AVX512F-NEXT:    vmovdqa 32(%rdi), %xmm0
1682; AVX512F-NEXT:    vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1683; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1684; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
1685; AVX512F-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
1686; AVX512F-NEXT:    vpermq {{.*#+}} ymm2 = mem[0,1,1,2]
1687; AVX512F-NEXT:    vpshufb %ymm1, %ymm2, %ymm1
1688; AVX512F-NEXT:    vmovdqa %ymm1, (%rsi)
1689; AVX512F-NEXT:    vmovdqa %ymm0, 32(%rsi)
1690; AVX512F-NEXT:    vzeroupper
1691; AVX512F-NEXT:    retq
1692;
1693; AVX512BW-LABEL: PR54562_mem:
1694; AVX512BW:       # %bb.0:
1695; AVX512BW-NEXT:    vpmovsxbq {{.*#+}} zmm0 = [0,1,1,2,3,4,4,5]
1696; AVX512BW-NEXT:    vpermq (%rdi), %zmm0, %zmm0
1697; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62]
1698; AVX512BW-NEXT:    vmovdqa64 %zmm0, (%rsi)
1699; AVX512BW-NEXT:    vzeroupper
1700; AVX512BW-NEXT:    retq
1701;
1702; AVX512DQ-LABEL: PR54562_mem:
1703; AVX512DQ:       # %bb.0:
1704; AVX512DQ-NEXT:    vmovdqa 32(%rdi), %xmm0
1705; AVX512DQ-NEXT:    vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1706; AVX512DQ-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1707; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
1708; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
1709; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm2 = mem[0,1,1,2]
1710; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm2, %ymm1
1711; AVX512DQ-NEXT:    vmovdqa %ymm1, (%rsi)
1712; AVX512DQ-NEXT:    vmovdqa %ymm0, 32(%rsi)
1713; AVX512DQ-NEXT:    vzeroupper
1714; AVX512DQ-NEXT:    retq
1715;
1716; AVX512VBMI-LABEL: PR54562_mem:
1717; AVX512VBMI:       # %bb.0:
1718; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46]
1719; AVX512VBMI-NEXT:    vpermb (%rdi), %zmm0, %zmm0
1720; AVX512VBMI-NEXT:    vmovdqa64 %zmm0, (%rsi)
1721; AVX512VBMI-NEXT:    vzeroupper
1722; AVX512VBMI-NEXT:    retq
1723  %load = load <64 x i8>, ptr %src, align 512
1724  %shuffle1 = shufflevector <64 x i8> %load, <64 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 poison, i32 poison, i32 poison, i32 poison, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 poison, i32 poison, i32 poison, i32 poison>
1725  %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> <i32 1,  i32 0,  i32 2,  i32 1,  i32 4,  i32 3,  i32 5,  i32 4, i32 7,  i32 6,  i32 8,  i32 7,  i32 10, i32 9,  i32 11, i32 10, i32 17, i32 16, i32 18, i32 17, i32 20, i32 19, i32 21, i32 20, i32 23, i32 22, i32 24, i32 23, i32 26, i32 25, i32 27, i32 26, i32 33, i32 32, i32 34, i32 33, i32 36, i32 35, i32 37, i32 36, i32 39, i32 38, i32 40, i32 39, i32 42, i32 41, i32 43, i32 42, i32 49, i32 48, i32 50, i32 49, i32 52, i32 51, i32 53, i32 52, i32 55, i32 54, i32 56, i32 55, i32 58, i32 57, i32 59, i32 58>
1726  store <64 x i8> %shuffle2, ptr %dst, align 512
1727  ret void
1728}
1729
1730define <64 x i8> @shuffle_v32i16_zextinreg_to_v16i32(<64 x i8> %a)  {
1731; ALL-LABEL: shuffle_v32i16_zextinreg_to_v16i32:
1732; ALL:       # %bb.0:
1733; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1734; ALL-NEXT:    retq
1735  %b = shufflevector <64 x i8> %a, <64 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison
1736>, <64 x i32> <i32 0, i32 1, i32 84, i32 84, i32 2, i32 3, i32 84, i32 84, i32 4, i32 5, i32 84, i32 84, i32 6, i32 7, i32 84, i32 84, i32 8, i32 9, i32 84, i32 84, i32 10, i32 11, i32 84, i32 84, i32 12, i32 13, i32 84, i32 84, i32 14, i32 15, i32 84, i32 84, i32 16, i32 17, i32 84, i32 84, i32 18, i32 19, i32 84, i32 84, i32 20, i32 21, i32 84, i32 84, i32 22, i32 23, i32 84, i32 84, i32 24, i32 25, i32 84, i32 84, i32 26, i32 27, i32 84, i32 84, i32 28, i32 29, i32 84, i32 84, i32 30, i32 31, i32 84, i32 84>
1737  ret <64 x i8> %b
1738}
1739